In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [2]:
#Training Models on Basic Data
df=pd.read_csv("Datasets/standarddata.csv")
df.drop(df.columns[0],axis=1,inplace=True)
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,1.009559,-0.439916,1.03453,-0.654012,-1.277445,-3.05401,2.110535,-0.286223,-0.903589,0.284912,-0.998016,-0.909172,-1.071457,-1.07821,-0.828207,0.829798,-1.145198,-1.160323,-0.992611,0
1,-0.990532,-0.439916,-0.966622,-0.654012,0.066327,0.327438,-0.93828,-0.286223,0.351386,-0.999747,0.286059,-0.909172,-1.071457,-1.07821,0.371271,-1.205113,-0.274744,-0.259629,-0.172165,0
2,-0.990532,-0.439916,-0.966622,-0.654012,-1.236724,0.327438,-0.93828,-0.286223,0.351386,0.284912,-0.998016,-0.909172,-1.071457,-1.07821,-0.828207,0.829798,-0.274744,-0.36266,-0.958066,1
3,-0.990532,-0.439916,-0.966622,-0.654012,0.514251,-3.05401,2.110535,-0.286223,0.351386,-0.999747,0.286059,0.347362,-1.071457,-1.07821,0.371271,-1.205113,0.595711,-0.746535,-0.193672,0
4,1.009559,-0.439916,-0.966622,-0.654012,-1.236724,0.327438,-0.93828,0.997769,-0.903589,-0.999747,-0.998016,-0.909172,-1.071457,-1.07821,-0.828207,0.829798,-1.145198,0.197365,-0.938874,1


In [3]:
#prepare Data for training/testing
X=df.iloc[:,:-1]
y=df.iloc[:,-1]


In [4]:
X.shape

(7043, 19)

In [5]:
y.shape

(7043,)

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=21)

In [7]:
#KNN Model
knn=KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train,y_train)
yknn=knn.predict(X_test)
knnacc=accuracy_score(yknn,y_test)
knnmatrix=confusion_matrix(y_test,yknn)
knnreport=classification_report(y_test,yknn,output_dict=True)
print(knnacc)
print(knnmatrix)
print(knnreport)

0.7735982966643009
[[911 124]
 [195 179]]
{'0': {'precision': 0.8236889692585895, 'recall': 0.8801932367149758, 'f1-score': 0.8510042036431574, 'support': 1035.0}, '1': {'precision': 0.5907590759075908, 'recall': 0.4786096256684492, 'f1-score': 0.5288035450516987, 'support': 374.0}, 'accuracy': 0.7735982966643009, 'macro avg': {'precision': 0.7072240225830901, 'recall': 0.6794014311917125, 'f1-score': 0.6899038743474281, 'support': 1409.0}, 'weighted avg': {'precision': 0.761860878333626, 'recall': 0.7735982966643009, 'f1-score': 0.7654803950461343, 'support': 1409.0}}


In [8]:
#SVC model
svc=SVC()
svc.fit(X_train,y_train)
ysvc=svc.predict(X_test)
svcacc=accuracy_score(ysvc,y_test)
svcmatrix=confusion_matrix(y_test,ysvc)
svcreport=classification_report(y_test,ysvc,output_dict=True)
print(svcacc)
print(svcmatrix)
print(svcreport)

0.7970191625266146
[[938  97]
 [189 185]]
{'0': {'precision': 0.8322981366459627, 'recall': 0.9062801932367149, 'f1-score': 0.8677150786308974, 'support': 1035.0}, '1': {'precision': 0.6560283687943262, 'recall': 0.4946524064171123, 'f1-score': 0.5640243902439024, 'support': 374.0}, 'accuracy': 0.7970191625266146, 'macro avg': {'precision': 0.7441632527201445, 'recall': 0.7004662998269136, 'f1-score': 0.7158697344373999, 'support': 1409.0}, 'weighted avg': {'precision': 0.7855097099770401, 'recall': 0.7970191625266146, 'f1-score': 0.7871044913656481, 'support': 1409.0}}


In [9]:
#Logistic Regression Model
logreg=LogisticRegression(max_iter=1000)
logreg.fit(X_train,y_train)
ylogreg=logreg.predict(X_test)
logregacc=accuracy_score(ylogreg,y_test)
logregmatrix=confusion_matrix(y_test,ylogreg)
logregreport=classification_report(y_test,ylogreg,output_dict=True)
print(logregacc)
print(logregmatrix)
print(logregreport)

0.7885024840312278
[[917 118]
 [180 194]]
{'0': {'precision': 0.8359161349134002, 'recall': 0.8859903381642512, 'f1-score': 0.8602251407129456, 'support': 1035.0}, '1': {'precision': 0.6217948717948718, 'recall': 0.5187165775401069, 'f1-score': 0.565597667638484, 'support': 374.0}, 'accuracy': 0.7885024840312278, 'macro avg': {'precision': 0.728855503354136, 'recall': 0.7023534578521791, 'f1-score': 0.7129114041757147, 'support': 1409.0}, 'weighted avg': {'precision': 0.7790805405866935, 'recall': 0.7885024840312278, 'f1-score': 0.782020261415679, 'support': 1409.0}}


In [10]:
#Tree Model
tree=DecisionTreeClassifier()
tree.fit(X_train,y_train)
ytree=tree.predict(X_test)
treeacc=accuracy_score(ytree,y_test)
treematrix=confusion_matrix(y_test,ytree)
treereport=classification_report(y_test,ytree,output_dict=True)
print(treeacc)
print(treematrix)
print(treereport)

0.7196593328601846
[[834 201]
 [194 180]]
{'0': {'precision': 0.811284046692607, 'recall': 0.8057971014492754, 'f1-score': 0.8085312651478429, 'support': 1035.0}, '1': {'precision': 0.47244094488188976, 'recall': 0.48128342245989303, 'f1-score': 0.4768211920529801, 'support': 374.0}, 'accuracy': 0.7196593328601846, 'macro avg': {'precision': 0.6418624957872484, 'recall': 0.6435402619545842, 'f1-score': 0.6426762286004115, 'support': 1409.0}, 'weighted avg': {'precision': 0.7213427265526438, 'recall': 0.7196593328601846, 'f1-score': 0.7204833110403349, 'support': 1409.0}}


In [11]:
#Forest Model
forest=RandomForestClassifier()
forest.fit(X_train,y_train)
yforest=forest.predict(X_test)
forestacc=accuracy_score(yforest,y_test)
forestmatrix=confusion_matrix(y_test,yforest)
forestreport=classification_report(y_test,yforest,output_dict=True)
print(forestacc)
print(forestmatrix)
print(forestreport)

0.7806955287437899
[[929 106]
 [203 171]]
{'0': {'precision': 0.8206713780918727, 'recall': 0.8975845410628019, 'f1-score': 0.8574065528380249, 'support': 1035.0}, '1': {'precision': 0.6173285198555957, 'recall': 0.4572192513368984, 'f1-score': 0.5253456221198156, 'support': 374.0}, 'accuracy': 0.7806955287437899, 'macro avg': {'precision': 0.7189999489737342, 'recall': 0.6774018961998501, 'f1-score': 0.6913760874789203, 'support': 1409.0}, 'weighted avg': {'precision': 0.7666967656146778, 'recall': 0.7806955287437899, 'f1-score': 0.7692654683180744, 'support': 1409.0}}


In [12]:
metrics={
    "model":["stdknn","stdsvc","stdLogReg","stdTree","stdForest"],
    "precision":[knnreport["weighted avg"]["precision"],svcreport["weighted avg"]["precision"],logregreport["weighted avg"]["precision"],treereport["weighted avg"]["precision"],forestreport["weighted avg"]["precision"]],
    "recall":[knnreport["weighted avg"]["recall"],svcreport["weighted avg"]["recall"],logregreport["weighted avg"]["recall"],treereport["weighted avg"]["recall"],forestreport["weighted avg"]["recall"]],
    "f1":[knnreport["weighted avg"]['f1-score'],svcreport["weighted avg"]["f1-score"],logregreport["weighted avg"]["f1-score"],treereport["weighted avg"]["f1-score"],forestreport["weighted avg"]["f1-score"]]
}
metrics=pd.DataFrame(metrics)
metrics.head()

Unnamed: 0,model,precision,recall,f1
0,stdknn,0.761861,0.773598,0.76548
1,stdsvc,0.78551,0.797019,0.787104
2,stdLogReg,0.779081,0.788502,0.78202
3,stdTree,0.721343,0.719659,0.720483
4,stdForest,0.766697,0.780696,0.769265


In [13]:
errors=pd.read_csv("Metrics.csv",index_col=0)
metrics=pd.concat([errors,metrics],ignore_index=True)
metrics

Unnamed: 0,model,precision,recall,f1
0,baseknn,0.780759,0.792051,0.765596
1,basesvc,0.539584,0.734564,0.622155
2,baseLogReg,0.786733,0.79489,0.789405
3,baseTree,0.725576,0.723918,0.724729
4,baseForest,0.772494,0.786373,0.774088
5,normalknn,0.756183,0.772179,0.759077
6,normalsvc,0.781897,0.79418,0.783316
7,normalLogReg,0.780784,0.790632,0.783614
8,normalTree,0.724664,0.721789,0.723174
9,normalForest,0.769291,0.782115,0.77217


In [14]:
metrics.to_csv("metrics.csv")