In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [2]:
#Training Models on Basic Data
df=pd.read_csv("Datasets/PCAdata.csv")
df.drop(df.columns[0],axis=1,inplace=True)
df.head()

Unnamed: 0,PC1,PC2,PC3,Churn
0,-1.901289,-1.677362,-3.782398,0
1,-0.663606,-0.818212,0.208648,0
2,-1.197334,-2.291034,0.481391,1
3,-0.448424,-0.080102,-4.02062,0
4,-2.759017,-2.170904,0.897915,1


In [3]:
#prepare Data for training/testing
X=df.iloc[:,:-1]
y=df.iloc[:,-1]


In [4]:
X.shape

(7043, 3)

In [5]:
y.shape

(7043,)

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=21)

In [7]:
#KNN Model
knn=KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train,y_train)
yknn=knn.predict(X_test)
knnacc=accuracy_score(yknn,y_test)
knnmatrix=confusion_matrix(y_test,yknn)
knnreport=classification_report(y_test,yknn,output_dict=True)
print(knnacc)
print(knnmatrix)
print(knnreport)

0.7764371894960965
[[935 100]
 [215 159]]
{'0': {'precision': 0.8130434782608695, 'recall': 0.9033816425120773, 'f1-score': 0.8558352402745996, 'support': 1035.0}, '1': {'precision': 0.6138996138996139, 'recall': 0.42513368983957217, 'f1-score': 0.5023696682464455, 'support': 374.0}, 'accuracy': 0.7764371894960965, 'macro avg': {'precision': 0.7134715460802417, 'recall': 0.6642576661758247, 'f1-score': 0.6791024542605225, 'support': 1409.0}, 'weighted avg': {'precision': 0.7601834319364482, 'recall': 0.7764371894960965, 'f1-score': 0.7620125831145359, 'support': 1409.0}}


In [8]:
#SVC model
svc=SVC()
svc.fit(X_train,y_train)
ysvc=svc.predict(X_test)
svcacc=accuracy_score(ysvc,y_test)
svcmatrix=confusion_matrix(y_test,ysvc)
svcreport=classification_report(y_test,ysvc,output_dict=True)
print(svcacc)
print(svcmatrix)
print(svcreport)

0.7792760823278921
[[937  98]
 [213 161]]
{'0': {'precision': 0.8147826086956522, 'recall': 0.9053140096618357, 'f1-score': 0.8576659038901602, 'support': 1035.0}, '1': {'precision': 0.6216216216216216, 'recall': 0.4304812834224599, 'f1-score': 0.5086887835703001, 'support': 374.0}, 'accuracy': 0.7792760823278921, 'macro avg': {'precision': 0.7182021151586369, 'recall': 0.6678976465421478, 'f1-score': 0.6831773437302302, 'support': 1409.0}, 'weighted avg': {'precision': 0.7635106362572652, 'recall': 0.7792760823278921, 'f1-score': 0.7650346455511767, 'support': 1409.0}}


In [9]:
#Logistic Regression Model
logreg=LogisticRegression(max_iter=1000)
logreg.fit(X_train,y_train)
ylogreg=logreg.predict(X_test)
logregacc=accuracy_score(ylogreg,y_test)
logregmatrix=confusion_matrix(y_test,ylogreg)
logregreport=classification_report(y_test,ylogreg,output_dict=True)
print(logregacc)
print(logregmatrix)
print(logregreport)

0.765791341376863
[[924 111]
 [219 155]]
{'0': {'precision': 0.8083989501312336, 'recall': 0.8927536231884058, 'f1-score': 0.8484848484848485, 'support': 1035.0}, '1': {'precision': 0.5827067669172933, 'recall': 0.4144385026737968, 'f1-score': 0.484375, 'support': 374.0}, 'accuracy': 0.765791341376863, 'macro avg': {'precision': 0.6955528585242634, 'recall': 0.6535960629311013, 'f1-score': 0.6664299242424243, 'support': 1409.0}, 'weighted avg': {'precision': 0.7484920115066676, 'recall': 0.765791341376863, 'f1-score': 0.7518368120523905, 'support': 1409.0}}


In [10]:
#Tree Model
tree=DecisionTreeClassifier()
tree.fit(X_train,y_train)
ytree=tree.predict(X_test)
treeacc=accuracy_score(ytree,y_test)
treematrix=confusion_matrix(y_test,ytree)
treereport=classification_report(y_test,ytree,output_dict=True)
print(treeacc)
print(treematrix)
print(treereport)

0.7224982256919801
[[857 178]
 [213 161]]
{'0': {'precision': 0.8009345794392523, 'recall': 0.8280193236714976, 'f1-score': 0.8142517814726841, 'support': 1035.0}, '1': {'precision': 0.4749262536873156, 'recall': 0.4304812834224599, 'f1-score': 0.45161290322580644, 'support': 374.0}, 'accuracy': 0.7224982256919801, 'macro avg': {'precision': 0.637930416563284, 'recall': 0.6292503035469788, 'f1-score': 0.6329323423492452, 'support': 1409.0}, 'weighted avg': {'precision': 0.7144000770750051, 'recall': 0.7224982256919801, 'f1-score': 0.7179941942020437, 'support': 1409.0}}


In [11]:
#Forest Model
forest=RandomForestClassifier()
forest.fit(X_train,y_train)
yforest=forest.predict(X_test)
forestacc=accuracy_score(yforest,y_test)
forestmatrix=confusion_matrix(y_test,yforest)
forestreport=classification_report(y_test,yforest,output_dict=True)
print(forestacc)
print(forestmatrix)
print(forestreport)

0.7650816181689141
[[919 116]
 [215 159]]
{'0': {'precision': 0.810405643738977, 'recall': 0.8879227053140096, 'f1-score': 0.8473951129552789, 'support': 1035.0}, '1': {'precision': 0.5781818181818181, 'recall': 0.42513368983957217, 'f1-score': 0.4899845916795069, 'support': 374.0}, 'accuracy': 0.7650816181689141, 'macro avg': {'precision': 0.6942937309603976, 'recall': 0.6565281975767909, 'f1-score': 0.668689852317393, 'support': 1409.0}, 'weighted avg': {'precision': 0.7487649689636914, 'recall': 0.7650816181689141, 'f1-score': 0.7525253223540449, 'support': 1409.0}}
