In [10]:
import pandas as pd
import numpy as np

In [11]:
df = pd.read_csv("telco_train.csv", index_col=0)
df_test = pd.read_csv("telco_test.csv", index_col=0)

In [12]:
X = df.drop(["customer_id", "churn"], axis=1)
y = df.churn

X_test = df_test.drop(["customer_id", "churn"], axis=1)
y_test = df_test.churn

In [13]:
X = X.astype(np.float64)
X_test = X_test.astype(np.float64)

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.fit_transform(X_test)

### LDA, QDA

In [15]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
lda.fit(X, y)
print("Accuracy of LDA on training set: {:.2f}".format(lda.score(X, y)))
print("Accuracy of LDA on test set: {:.2f}".format(lda.score(X_test, y_test)))

qda = QuadraticDiscriminantAnalysis()
qda.fit(X, y)
print("Accuracy of QDA on traning set: {:.2f}".format(qda.score(X, y)))
print("Accuracy of QDA on test set: {:.2f}".format(qda.score(X_test, y_test)))

Accuracy of LDA on training set: 0.80
Accuracy of LDA on test set: 0.82
Accuracy of QDA on traning set: 0.77
Accuracy of QDA on test set: 0.77


## Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier().fit(X, y)

print("Accuracy of Decision Tree classifier on training set: {:.2f}".format(dtc.score(X, y)))
print("Accuracy of Decision Tree classifier on test set: {:.2f}".format(dtc.score(X_test, y_test)))

Accuracy of Decision Tree classifier on training set: 1.00
Accuracy of Decision Tree classifier on test set: 0.72


## K-Nearest Neighbors

In [17]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier().fit(X, y)

print("Accuracy of KNN classifier on training set: {:.2f}".format(knn.score(X, y)))
print("Accuracy of KNN classifier on test set: {:.2f}".format(knn.score(X_test, y_test)))

Accuracy of KNN classifier on training set: 0.83
Accuracy of KNN classifier on test set: 0.77


## Gaussian Naive Bayes

In [18]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB().fit(X, y)

print("Accuracy of GNB classifier on training set: {:.2f}".format(gnb.score(X, y)))
print("Accuracy of GNB classifier on test set: {:.2f}".format(gnb.score(X_test, y_test)))

Accuracy of GNB classifier on training set: 0.76
Accuracy of GNB classifier on test set: 0.77


## Support Vector Machine

In [19]:
from sklearn.svm import SVC 

svm = SVC().fit(X, y)

print("Accuracy of SVM classifier on training set: {:.2f}".format(svm.score(X, y)))
print("Accuracy of SVM classifier on test set: {:.2f}".format(svm.score(X_test, y_test)))

Accuracy of SVM classifier on training set: 0.82
Accuracy of SVM classifier on test set: 0.81


## RBF SVM

In [23]:
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

rbf = GaussianProcessClassifier(1.0 * RBF(1.0))
rbf.fit(X, y)

print("Accuracy of RBF SVM classifier on training set: {:.2f}".format(rbf.score(X, y)))
print("Accuracy of RBF SVM classifier on test set: {:.2f}".format(rbf.score(X_test, y_test)))

Accuracy of RBF SVM classifier on training set: 0.80
Accuracy of RBF SVM classifier on test set: 0.82


## Random Forests

In [25]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

rfc = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
rfc.fit(X, y)

print("Accuracy of Random Forests classifier on training set: {:.2f}".format(rfc.score(X, y)))
print("Accuracy of Random Forests classifier on test set: {:.2f}".format(rfc.score(X_test, y_test)))

Accuracy of Random Forests classifier on training set: 0.78
Accuracy of Random Forests classifier on test set: 0.78


## AdaBoost

In [26]:
ada = AdaBoostClassifier()
ada.fit(X, y)

print("Accuracy of AdaBoost classifier on training set: {:.2f}".format(ada.score(X, y)))
print("Accuracy of AdaBoost classifier on test set: {:.2f}".format(ada.score(X_test, y_test)))

Accuracy of AdaBoost classifier on training set: 0.80
Accuracy of AdaBoost classifier on test set: 0.81


## Neural Net

In [27]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(alpha=1)
mlp.fit(X, y)

print("Accuracy of MLP classifier on training set: {:.2f}".format(mlp.score(X, y)))
print("Accuracy of MLP classifier on test set: {:.2f}".format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.81
Accuracy of MLP classifier on test set: 0.82
