# Importing Libraries

In [66]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


# Importing Dataset

In [67]:
data = pd.read_csv('most-cleaned-in-vehicle-coupon-recommendation.csv')
data.drop(columns=['Unnamed: 0'], inplace=True)
target_col = data.pop('Y')  
data.insert(len(data.columns), 'Y', target_col)
data.head()

Unnamed: 0,temperature,expiration,gender,has_children,education,occupation,income,Bar,CoffeeHouse,CarryAway,...,age_41,age_46,age_50plus,age_below21,maritalStatus_Divorced,maritalStatus_Married partner,maritalStatus_Single,maritalStatus_Unmarried partner,maritalStatus_Widowed,Y
0,0.375,0,0,1,5,24,3,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1
1,0.625,1,0,1,5,24,3,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
2,0.625,1,0,1,5,24,3,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1
3,0.625,1,0,1,5,24,3,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
4,0.625,0,0,1,5,24,3,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0


In [68]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12610 entries, 0 to 12609
Data columns (total 49 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   temperature                      12610 non-null  float64
 1   expiration                       12610 non-null  int64  
 2   gender                           12610 non-null  int64  
 3   has_children                     12610 non-null  int64  
 4   education                        12610 non-null  int64  
 5   occupation                       12610 non-null  int64  
 6   income                           12610 non-null  int64  
 7   Bar                              12610 non-null  int64  
 8   CoffeeHouse                      12610 non-null  int64  
 9   CarryAway                        12610 non-null  int64  
 10  RestaurantLessThan20             12610 non-null  int64  
 11  Restaurant20To50                 12610 non-null  int64  
 12  toCoupon_GEQ15min 

In [69]:
data.rename(columns={"coupon_Restaurant(<20)": "coupon_Restaurant(lessthan_20)"}, inplace=True)

# Spliting data for training models

In [70]:
X = data.drop("Y", axis=1)     
y = data["Y"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Training on Different models

### Logistic Regression

In [71]:
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

accuracy = accuracy_score(y_test, lr_pred)
precision = precision_score(y_test, lr_pred)
recall = recall_score(y_test, lr_pred)
f1 = f1_score(y_test, lr_pred)

# Print the results
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1)
print("\nClassification Report:")
print(classification_report(y_test, lr_pred))

Accuracy : 0.6782976473698124
Precision: 0.6932203389830508
Recall   : 0.7684358853922029
F1 Score : 0.7288928491869013

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.56      0.60      1654
           1       0.69      0.77      0.73      2129

    accuracy                           0.68      3783
   macro avg       0.67      0.67      0.67      3783
weighted avg       0.68      0.68      0.67      3783



### Random Forest

In [72]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

accuracy = accuracy_score(y_test, rf_pred)
precision = precision_score(y_test, rf_pred)
recall = recall_score(y_test, rf_pred)
f1 = f1_score(y_test, rf_pred)

# Print the results
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1)
print("\nClassification Report:")
print(classification_report(y_test, lr_pred))

Accuracy : 0.7483478720592123
Precision: 0.7575492341356674
Recall   : 0.8130577736026303
F1 Score : 0.784322609877662

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.56      0.60      1654
           1       0.69      0.77      0.73      2129

    accuracy                           0.68      3783
   macro avg       0.67      0.67      0.67      3783
weighted avg       0.68      0.68      0.67      3783



### XGBoost

In [73]:
xgb = XGBClassifier(eval_metric='logloss', random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)

accuracy = accuracy_score(y_test, xgb_pred)
precision = precision_score(y_test, xgb_pred)
recall = recall_score(y_test, xgb_pred)
f1 = f1_score(y_test, xgb_pred)

# Print the results
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1)
print("\nClassification Report:")
print(classification_report(y_test, lr_pred))

Accuracy : 0.7613005551149881
Precision: 0.7695690413368513
Recall   : 0.8219821512447159
F1 Score : 0.794912559618442

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.56      0.60      1654
           1       0.69      0.77      0.73      2129

    accuracy                           0.68      3783
   macro avg       0.67      0.67      0.67      3783
weighted avg       0.68      0.68      0.67      3783

