In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# Create sample dataset
data = {
    'Age': [25, 32, 47, 51, 23, 45, 55, 38, 28, 40],
    'Salary': [40000, 75000, 125000, 110000, 35000, 95000, 140000, 80000, 45000, 85000],
    'Family_Size': [1, 2, 4, 3, 1, 3, 2, 4, 1, 3],
    'Vehicle_Type': [0, 1, 1, 2, 0, 1, 2, 1, 0, 2],
    'Purchased': [0, 0, 1, 1, 0, 1, 1, 1, 0, 1]
}

df = pd.DataFrame(data)
print("Sample Dataset:")
print(df)

Sample Dataset:
   Age  Salary  Family_Size  Vehicle_Type  Purchased
0   25   40000            1             0          0
1   32   75000            2             1          0
2   47  125000            4             1          1
3   51  110000            3             2          1
4   23   35000            1             0          0
5   45   95000            3             1          1
6   55  140000            2             2          1
7   38   80000            4             1          1
8   28   45000            1             0          0
9   40   85000            3             2          1


In [4]:
# Step 1: Prepare the data
X = df.drop('Purchased', axis=1)  # Features
y = df['Purchased']               # Target variable

# Step 2: Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Feature scaling (important for logistic regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Create and train the logistic regression model
logreg = LogisticRegression(random_state=42)
logreg.fit(X_train_scaled, y_train)

# Step 5: Make predictions
y_pred = logreg.predict(X_test_scaled)

# Step 6: Evaluate the model
print("\nModel Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 7: Interpret the model
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, logreg.coef_[0]):
    print(f"{feature}: {coef:.4f}")

# Step 8: Make a new prediction
new_data = pd.DataFrame({
    'Age': [30],
    'Salary': [60000],
    'Family_Size': [2],
    'Vehicle_Type': [1]
})

new_data_scaled = scaler.transform(new_data)
prediction = logreg.predict(new_data_scaled)
prediction_prob = logreg.predict_proba(new_data_scaled)

print(f"\nNew Prediction: {'Will purchase' if prediction[0] == 1 else 'Will not purchase'}")
print(f"Probability: {prediction_prob[0][1]:.2%} chance of purchasing")


Model Evaluation:
Accuracy: 0.67

Confusion Matrix:
[[1 1]
 [0 1]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3


Model Coefficients:
Age: 0.5469
Salary: 0.5252
Family_Size: 0.6588
Vehicle_Type: 0.5889

New Prediction: Will purchase
Probability: 55.78% chance of purchasing
