In [5]:
# Cell 1
!pip install pandas numpy scikit-learn matplotlib seaborn



In [6]:
# Cell 2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle

In [7]:
# Cell 3
# Sample dataset simulation
data = pd.DataFrame({
    'age': np.random.randint(18, 60, 500),
    'gender': np.random.choice(['Male', 'Female'], 500),
    'browsing_time': np.random.uniform(2, 30, 500),
    'pages_visited': np.random.randint(1, 15, 500),
    'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], 500),
    'previous_purchases': np.random.randint(0, 5, 500),
    'interested': np.random.choice([0, 1], 500)  # 0: Not Buy, 1: Buy
})

data.head()

Unnamed: 0,age,gender,browsing_time,pages_visited,device,previous_purchases,interested
0,47,Female,4.274099,1,Desktop,0,0
1,26,Female,21.009947,8,Mobile,4,0
2,35,Male,14.372045,9,Desktop,4,1
3,58,Male,28.031437,14,Mobile,1,0
4,33,Female,4.382651,2,Tablet,2,0


In [8]:
# Cell 4
df = data.copy()

# Encode categorical columns
le_gender = LabelEncoder()
le_device = LabelEncoder()

df['gender'] = le_gender.fit_transform(df['gender'])
df['device'] = le_device.fit_transform(df['device'])

# Split features and label
X = df.drop('interested', axis=1)
y = df['interested']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [9]:
# Cell 5
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.53

Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.54      0.55        54
           1       0.49      0.52      0.51        46

    accuracy                           0.53       100
   macro avg       0.53      0.53      0.53       100
weighted avg       0.53      0.53      0.53       100



In [10]:
# Cell 6
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Also save label encoders and scaler
with open('le_gender.pkl', 'wb') as f:
    pickle.dump(le_gender, f)

with open('le_device.pkl', 'wb') as f:
    pickle.dump(le_device, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)