In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline

churn_df = pd.read_csv("./data/Telco-Customer-Churn.csv").rename(columns={"Unnamed: 0": "customerID"})


churn_df = churn_df.drop(["customerID", "TotalCharges"], axis=1)
churn_df["Churn"] = churn_df["Churn"].apply(lambda x: 1 if x=="Yes" else 0)

non_numeric_columns = churn_df.select_dtypes(include="object").columns.tolist()

encoder = LabelEncoder()

for col in non_numeric_columns:
    churn_df[col] = encoder.fit_transform(churn_df[col])


majority_class = churn_df[churn_df['Churn'] == 0]
minority_class = churn_df[churn_df['Churn'] == 1]


majority_class_undersampled = majority_class.sample(n=minority_class.shape[0], random_state=42)
balanced_churn_df = pd.concat([majority_class_undersampled, minority_class])
balanced_churn_df = balanced_churn_df.sample(frac=1, random_state=42).reset_index(drop=True)


X = balanced_churn_df.drop("Churn", axis=1)
y = balanced_churn_df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

models = [LogisticRegression(class_weight='balanced'), LinearSVC(C=1, loss="hinge")]

for model in models:
        
    clf = Pipeline([
        ("scaler", StandardScaler()),
        ("model", model)
    ])
   
    clf.fit(X_train, y_train)
    print("Model: ", model.__class__.__name__)
    y_pred = clf.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    pr_score = precision_score(y_test, y_pred)
    rc_score = recall_score(y_test, y_pred)
    f_score = f1_score(y_test, y_pred)
    cf_matrix = confusion_matrix(y_test, y_pred)   

    print("Accuracy: ", accuracy)
    print("Persition score: ", pr_score)
    print("Recall score: ", rc_score)
    print("F1 Score: ", f_score)
    print("-----------------------------")
    print("Confusion Matrix",cf_matrix)

Model:  LogisticRegression
Accuracy:  0.7839572192513369
Persition score:  0.7520325203252033
Recall score:  0.8222222222222222
F1 Score:  0.7855626326963907
-----------------------------
Confusion Matrix [[363 122]
 [ 80 370]]
Model:  LinearSVC
Accuracy:  0.7839572192513369
Persition score:  0.7412451361867705
Recall score:  0.8466666666666667
F1 Score:  0.7904564315352697
-----------------------------
Confusion Matrix [[352 133]
 [ 69 381]]


