In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load data
int_billing = pd.read_csv('/content/data_international_billing.csv')
local_billing = pd.read_csv('/content/data_local_billing.csv')
churn_target = pd.read_csv('/content/data_churn_target.csv')

# Combine the data
churn_data = int_billing.merge(local_billing, on='Phone').merge(churn_target, on='Phone')

# Preprocessing
churn_data.fillna(0, inplace=True)
churn_data.replace({'Churn?': {'False.': 0, 'True.': 1}, 'VMail Plan': {'no': 0, 'yes': 1}, "Int'l Plan": {'no': 0, 'yes': 1}}, inplace=True)
churn_data.drop(['Phone', 'State', 'Area Code'], axis=1, inplace=True)

# Data splitting
X = churn_data.iloc[:, 1:17]
Y = churn_data['Churn?']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

# Model building
KNN = KNeighborsClassifier(n_neighbors=5)
KNN.fit(X_train, Y_train)
KNN_predict = KNN.predict(X_test)

RFC = RandomForestClassifier()
RFC.fit(X_train, Y_train)
RFC_predict = RFC.predict(X_test)

# Evaluation
KNN_accuracy = accuracy_score(Y_test, KNN_predict)
RFC_accuracy = accuracy_score(Y_test, RFC_predict)

KNN_clf_report = classification_report(Y_test, KNN_predict)
RFC_clf_report = classification_report(Y_test, RFC_predict)

print('KNN_accuracy =', KNN_accuracy, 'RFC_accuracy =', RFC_accuracy)
print('KNN_clf_report:', KNN_clf_report)
print('RFC_clf_report:', RFC_clf_report)


KNN_accuracy = 0.8875502008032129 RFC_accuracy = 0.927710843373494
KNN_clf_report:               precision    recall  f1-score   support

           0       0.90      0.97      0.94       872
           1       0.61      0.27      0.38       124

    accuracy                           0.89       996
   macro avg       0.76      0.62      0.66       996
weighted avg       0.87      0.89      0.87       996

RFC_clf_report:               precision    recall  f1-score   support

           0       0.93      0.99      0.96       872
           1       0.85      0.51      0.64       124

    accuracy                           0.93       996
   macro avg       0.89      0.75      0.80       996
weighted avg       0.92      0.93      0.92       996

