In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load datasets
df_purchase = pd.read_csv("User_product_purchase_details_p2.csv")
df_user = pd.read_csv("user_demographics.csv")

# Merge on User_ID
df = pd.merge(df_purchase, df_user, on="User_ID", how="left")

# Create binary target
df["High_Value_Purchase"] = (df["Purchase"] >= 10000).astype(int)

# Drop unnecessary columns
df = df.drop(["Product_ID"], axis=1)

# Handle missing values
df = df.fillna(0)

# Encode categorical variables
df = pd.get_dummies(df, columns=["Gender", "Age", "City_Category", 
                                 "Stay_In_Current_City_Years", 
                                 "Product_Category_1", 
                                 "Product_Category_2", 
                                 "Product_Category_3"], drop_first=True)

# Features and target
X = df.drop(["High_Value_Purchase", "Purchase"], axis=1)
y = df["High_Value_Purchase"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Train Logistic Regression
log = LogisticRegression(max_iter=2000)
log.fit(X_train_scaled, y_train)

# Predictions
pred_lr = log.predict(X_test_scaled)

# Evaluation
print("LR Accuracy:", accuracy_score(y_test, pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_lr))

#Precision
from sklearn.metrics import precision_score
precision = precision_score(y_test, pred_lr)
print("Precision:", precision)

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train_scaled, y_train)
pred_dt = dt.predict(X_test_scaled)
print("DT Accuracy:", accuracy_score(y_test, pred_dt))

#Decision Tree Precision
precision_dt = precision_score(y_test, pred_dt)
print("DT Precision:", precision_dt)



LR Accuracy: 0.8989855836529896
Confusion Matrix:
 [[62889  9217]
 [ 1896 36012]]
Precision: 0.7962148179265516
DT Accuracy: 0.8712800189066846
DT Precision: 0.827156751990742


In [12]:
from tensorflow.keras import models, layers

# Build MLP
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train
history = model.fit(X_train_scaled, y_train, epochs=15, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate
loss, acc = model.evaluate(X_test_scaled, y_test)
print("MLP Accuracy:", acc)

# Precision for MLP
y_pred_mlp = (model.predict(X_test_scaled) > 0.5).astype("int32")
precision_mlp = precision_score(y_test, y_pred_mlp)
print("MLP Precision:", precision_mlp)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 982us/step - accuracy: 0.8965 - loss: 0.2536 - val_accuracy: 0.9009 - val_loss: 0.2455
Epoch 2/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 922us/step - accuracy: 0.8995 - loss: 0.2466 - val_accuracy: 0.9013 - val_loss: 0.2444
Epoch 3/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 965us/step - accuracy: 0.8998 - loss: 0.2454 - val_accuracy: 0.9007 - val_loss: 0.2442
Epoch 4/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 986us/step - accuracy: 0.9003 - loss: 0.2447 - val_accuracy: 0.9007 - val_loss: 0.2440
Epoch 5/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 894us/step - accuracy: 0.9005 - loss: 0.2441 - val_accuracy: 0.9013 - val_loss: 0.2433
Epoch 6/15
[1m11002/11002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 886us/step - accuracy: 0.9005 - loss: 0.2437 - val_accuracy: 0.9007 - val