In [1]:
import pandas as pd
import numpy as np

# Set seed
np.random.seed(42)

# Create synthetic dataset
num_samples = 1000
data = {
    "age": np.random.randint(18, 65, num_samples),
    "gender": np.random.choice(["Male", "Female"], num_samples),
    "income": np.random.randint(20000, 150000, num_samples),
    "education": np.random.choice(["Graduate", "High School", "Post-Graduate"], num_samples),
    "marital_status": np.random.choice(["Single", "Married"], num_samples),
    "insurance_bought": np.random.choice([0, 1], num_samples, p=[0.6, 0.4])
}

df = pd.DataFrame(data)
df.to_csv("insurance_customers.csv", index=False)
df.head()


Unnamed: 0,age,gender,income,education,marital_status,insurance_bought
0,56,Male,49241,High School,Married,0
1,46,Female,116214,Post-Graduate,Married,1
2,32,Female,64569,Post-Graduate,Married,0
3,60,Female,31745,Post-Graduate,Single,1
4,25,Male,46029,Post-Graduate,Married,1


In [3]:
# Load dataset
df = pd.read_csv("insurance_customers.csv")

# Preprocessing
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])
df['education'] = le.fit_transform(df['education'])
df['marital_status'] = le.fit_transform(df['marital_status'])

# Features and target
X = df.drop("insurance_bought", axis=1)
y = df["insurance_bought"]

# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate
from sklearn.metrics import classification_report, confusion_matrix
y_pred = model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.58      0.59      0.59       125
           1       0.30      0.29      0.30        75

    accuracy                           0.48       200
   macro avg       0.44      0.44      0.44       200
weighted avg       0.48      0.48      0.48       200

Confusion Matrix:
 [[74 51]
 [53 22]]
