# Customer Churn Classification
## Alfido Tech Internship Project


### Import Libraries

In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


### Load Dataset

In [ ]:
data = pd.read_csv('../data/churn.csv')
data.head()

### Preprocessing & Split

In [ ]:
X = data.drop('Churn', axis=1)
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Logistic Regression

In [ ]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_prob = lr.predict_proba(X_test)[:,1]

### Random Forest

In [ ]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_prob = rf.predict_proba(X_test)[:,1]

### Evaluation

In [ ]:
def metrics(y,p,prob):
    return [accuracy_score(y,p), precision_score(y,p), recall_score(y,p), f1_score(y,p), roc_auc_score(y,prob)]

results = pd.DataFrame([
    metrics(y_test, lr_pred, lr_prob),
    metrics(y_test, rf_pred, rf_prob)
], columns=['Accuracy','Precision','Recall','F1','ROC-AUC'], index=['Logistic Regression','Random Forest'])
results

### Cross Validation

In [ ]:
cross_val_score(rf, X_train, y_train, cv=5, scoring='f1').mean()