In [1]:
# Importing required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# Creating a small classification dataset manually
data = pd.DataFrame({
    'Age': [22, 25, 47, 52, 46, 56, 21, 23, 50, 30],
    'Salary (k)': [35, 40, 60, 80, 50, 90, 30, 38, 70, 45],
    'Credit Score': [600, 650, 710, 800, 780, 850, 580, 610, 770, 670],
    'Purchased': [0, 0, 1, 1, 1, 1, 0, 0, 1, 0]  # Target variable: 1 = Purchased, 0 = Not Purchased
})

print("Raw Dataset:")
print(data)

# Splitting the dataset into features (X) and target (y)
X = data[['Age', 'Salary (k)', 'Credit Score']]
y = data['Purchased']

# Data preprocessing: Standardizing the features (scaling the data)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# List of classification models
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine (SVM)': SVC(),
    'Naive Bayes': GaussianNB()
}

# Training and evaluating each model
for model_name, model in models.items():
    # Training the model
    model.fit(X_train, y_train)
    # Predicting on the test data
    y_pred = model.predict(X_test)
    # Evaluating the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nModel: {model_name}")
    print(f"Accuracy: {accuracy:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Example prediction for a new input using Logistic Regression
new_input = pd.DataFrame({'Age': [28], 'Salary (k)': [50], 'Credit Score': [700]})
new_input_scaled = scaler.transform(new_input)  # Apply the same scaling
logistic_model = models['Logistic Regression']  # Using Logistic Regression for prediction
predicted_class = logistic_model.predict(new_input_scaled)
print(f"\nPredicted Class for new input {new_input.values.tolist()}: {predicted_class[0]} (1 = Purchased, 0 = Not Purchased)")


Raw Dataset:
   Age  Salary (k)  Credit Score  Purchased
0   22          35           600          0
1   25          40           650          0
2   47          60           710          1
3   52          80           800          1
4   46          50           780          1
5   56          90           850          1
6   21          30           580          0
7   23          38           610          0
8   50          70           770          1
9   30          45           670          0

Model: Logistic Regression
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Model: Decision Tree
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   supp