In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score



In [3]:
# Load the dataset (replace 'Customers - Customers.csv' with your file)
df = pd.read_csv('Customers - Customers.csv')

# Check for missing values
print("Missing values:\n", df.isnull().sum())

Missing values:
 CustomerID                 0
Gender                     0
Age                        0
Annual Income ($)          0
Spending Score (1-100)     0
Profession                35
Work Experience            0
Family Size                0
dtype: int64


In [4]:
# Simulate a realistic 'Response' column based on spending habits and income
# Assumption: Customers with a Spending Score > 50 and Annual Income > $50,000 are more likely to respond
df['Response'] = df.apply(lambda row: 1 if (row['Spending Score (1-100)'] > 50) and (row['Annual Income ($)'] > 50000) else 0, axis=1)

# Encode categorical variables (Gender and Profession)
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_prof = LabelEncoder()
df['Profession'] = le_prof.fit_transform(df['Profession'])

In [5]:
# Remove unnecessary columns (CustomerID and Spending Score to avoid overfitting)
df = df.drop(columns=['CustomerID', 'Spending Score (1-100)'])

# Define features (X) and target (y)
X = df.drop(columns=['Response'])
y = df['Response']

In [6]:
# Standardize numerical features
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

In [7]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Calculate metrics
acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {acc:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Confusion Matrix:
 [[182 142]
 [153 123]]
Accuracy: 0.51
Precision: 0.46
Recall: 0.45
F1-Score: 0.45
