<a href="https://colab.research.google.com/github/hanieranjbar/snappfood_task/blob/main/ChurnPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE
from datetime import datetime

churn_data = pd.read_csv("dataset_churned.csv")


def calculate_days_from_start(row, reference_date):
    start_date = datetime.strptime(row['subscription_start'], '%m/%d/%Y')
    ref_date = datetime.strptime(reference_date, '%m/%d/%Y')
    return (start_date - ref_date).days

reference_date = "1/1/2024"
churn_data['days_from_start'] = churn_data.apply(calculate_days_from_start, axis=1, reference_date=reference_date)

le = LabelEncoder()
churn_data['subscription_status'] = le.fit_transform(churn_data['subscription_status'])


scaler = StandardScaler()
churn_data[['total_orders', 'total_order_value', 'days_since_last_order', 'total_pro_orders', 'total_non_pro_orders']] = scaler.fit_transform(
    churn_data[['total_orders', 'total_order_value', 'days_since_last_order', 'total_pro_orders', 'total_non_pro_orders']])

X_churn = churn_data[['total_orders', 'total_order_value', 'days_since_last_order', 'total_pro_orders', 'total_non_pro_orders']]
y_churn = churn_data['churned']


X_train, X_test, y_train, y_test = train_test_split(X_churn, y_churn, test_size=0.2, random_state=42)


smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

logreg_model = LogisticRegression(class_weight='balanced')
logreg_model.fit(X_train_resampled, y_train_resampled)

y_pred_logreg = logreg_model.predict(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_model.fit(X_train_resampled, y_train_resampled)

y_pred_rf = rf_model.predict(X_test)

print("Churn Model Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Churn Logistic Regression Classification Report:\n", classification_report(y_test, y_pred_logreg))
print("Churn Logistic Regression Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logreg))

print("Churn Model Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Churn Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))
print("Churn Random Forest Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))




Churn Model Logistic Regression Accuracy: 0.57
Churn Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.58      0.71       182
           1       0.10      0.44      0.16        18

    accuracy                           0.57       200
   macro avg       0.50      0.51      0.43       200
weighted avg       0.84      0.57      0.66       200

Churn Logistic Regression Confusion Matrix:
 [[106  76]
 [ 10   8]]
Churn Model Random Forest Accuracy: 0.81
Churn Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.87      0.89       182
           1       0.12      0.17      0.14        18

    accuracy                           0.81       200
   macro avg       0.51      0.52      0.51       200
weighted avg       0.84      0.81      0.83       200

Churn Random Forest Confusion Matrix:
 [[159  23]
 [ 15   3]]
