In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import confusion_matrix
import sklearn

df_churn = pd.read_csv('churn_clean.csv')

In [2]:
#Treat missing values in InternetService with mode imputation
df_churn['InternetService'] = df_churn['InternetService'].fillna(df_churn['InternetService'].mode()[0])

In [3]:
#Re-express dependent variable, Churn, as numeric 

#Find unique values of variable
print(df_churn["Churn"].unique())

#Create dictionary to store numeric values for variable
dict_churn = {"Churn":
                    {"Yes":1,
                     "No":0
                    }
                }

#Replace categorical values with numeric values from dictionary
df_churn.replace(dict_churn, inplace=True)

#Change variable to float for compatability with logistic regression
df_churn["Churn"] = df_churn["Churn"].astype(int)

#Confirm categorical values have been replaced
print(df_churn["Churn"].unique())

['No' 'Yes']
[0 1]


In [4]:
#Re-express Contract as numeric using one-hot encoding

#Use pd.get_dummies to turn Contract variable into 3 dummy variables
df_contract = pd.get_dummies(df_churn["Contract"], prefix="dummy")

#Change data type of dummy variables from boolean to float
df_contract = df_contract.astype(float)

#Join dummy_One year and dummy_Two Year to df_churn.  Use dummy_Month-to-month as base category.
df_churn = df_churn.join(df_contract[["dummy_One year", "dummy_Two Year"]])

In [5]:
#Re-express InternetService as numeric using one-hot encoding

#Use pd.get_dummies to turn InternetService variable into 2 dummy variable
df_internet = pd.get_dummies(df_churn["InternetService"], prefix="dummy")

#Change data type of dummy variables from boolean to float
df_internet = df_internet.astype(float)

#Join dummy_Fiber Optic to df_churn
df_churn = df_churn.join(df_internet["dummy_Fiber Optic"])

In [6]:
#Re-express TechSupport as numeric using ordinal encoding

#Find unique values of  variable
print(df_churn["TechSupport"].unique())

#Create dictionary to store numeric values for variable
dict_techsupport = {"TechSupport":
                    {"Yes":1,
                     "No":0,
                    }
                }

#Replace categorical values with numeric values from dictionary
df_churn.replace(dict_techsupport, inplace=True)

#Confirm categorical values have been replaced
print(df_churn["TechSupport"].unique())

['No' 'Yes']
[0 1]


In [7]:
#Rename Item columns in df_churn 
df_churn = df_churn.rename(columns = {'Item1':'Responses','Item2':'Fixes','Item3':'Replacements','Item4':'Reliability'})

## Initial Logistic Regression Model

In [9]:
#Create initial logistic regression model [In-text citation: (LaRose et al, 2019)]
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","MonthlyCharge","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Responses","Fixes","Replacements","Reliability"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.246969
         Iterations 8


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.573
Date:,2024-02-05 17:20,AIC:,4967.3750
No. Observations:,10000,BIC:,5068.3198
Df Model:,13,Log-Likelihood:,-2469.7
Df Residuals:,9986,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,8.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,-4.9120,0.2916,-16.8432,0.0000,-5.4836,-4.3404
Outage_sec_perweek,-0.0012,0.0121,-0.0958,0.9237,-0.0250,0.0226
Contacts,0.0493,0.0360,1.3689,0.1710,-0.0213,0.1200
Yearly_equip_failure,-0.0219,0.0567,-0.3861,0.6994,-0.1331,0.0893
Tenure,-0.1019,0.0025,-40.5148,0.0000,-0.1069,-0.0970
MonthlyCharge,0.0492,0.0013,38.4982,0.0000,0.0467,0.0517
dummy_One year,-2.9832,0.1143,-26.0934,0.0000,-3.2072,-2.7591
dummy_Two Year,-3.0552,0.1109,-27.5538,0.0000,-3.2726,-2.8379
dummy_Fiber Optic,-1.6949,0.0821,-20.6383,0.0000,-1.8559,-1.5339


### Model Reduction using Variance Inflation Factor

In [11]:
#Assign independent variables to dataframe X
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","MonthlyCharge","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Responses","Fixes","Replacements","Reliability"]])

#Create VIF dataframe [In-text citation: GeeksforGeeks]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

#Calculate VIF for each independent variable
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]

#print VIF data
print(vif_data)

                 feature        VIF
0     Outage_sec_perweek  10.062425
1               Contacts   1.981046
2   Yearly_equip_failure   1.382617
3                 Tenure   2.630068
4          MonthlyCharge  13.493832
5         dummy_One year   1.376736
6         dummy_Two Year   1.438965
7      dummy_Fiber Optic   2.872565
8            TechSupport   1.626659
9              Responses  25.102549
10                 Fixes  22.969036
11          Replacements  18.970597
12           Reliability  10.141270


In [11]:
#Assign independent variables to dataframe X
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","MonthlyCharge","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Responses","Fixes","Replacements","Reliability"]])

#Create VIF dataframe [In-text citation: GeeksforGeeks]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

#Calculate VIF for each independent variable
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]

#print VIF data
print(vif_data)

                 feature        VIF
0     Outage_sec_perweek  10.062425
1               Contacts   1.981046
2   Yearly_equip_failure   1.382617
3                 Tenure   2.630068
4          MonthlyCharge  13.493832
5         dummy_One year   1.376736
6         dummy_Two Year   1.438965
7      dummy_Fiber Optic   2.872565
8            TechSupport   1.626659
9              Responses  25.102549
10                 Fixes  22.969036
11          Replacements  18.970597
12           Reliability  10.141270


In [12]:
#Assign independent variables to dataframe X, remove Responses 
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","MonthlyCharge","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Fixes","Replacements","Reliability"]])

#Create VIF dataframe [In-text citation: GeeksforGeeks]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

#Calculate VIF for each independent variable
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]

#print VIF data
print(vif_data)

                 feature        VIF
0     Outage_sec_perweek  10.052907
1               Contacts   1.980515
2   Yearly_equip_failure   1.381201
3                 Tenure   2.629971
4          MonthlyCharge  13.421418
5         dummy_One year   1.376498
6         dummy_Two Year   1.438877
7      dummy_Fiber Optic   2.869853
8            TechSupport   1.625990
9                  Fixes  16.150396
10          Replacements  15.982121
11           Reliability  10.114242


In [13]:
#Assign independent variables to dataframe X, remove Responses 
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","MonthlyCharge","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Replacements","Reliability"]])

#Create VIF dataframe [In-text citation: GeeksforGeeks]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

#Calculate VIF for each independent variable
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]

#print VIF data
print(vif_data)

                 feature        VIF
0     Outage_sec_perweek   9.908351
1               Contacts   1.977029
2   Yearly_equip_failure   1.380827
3                 Tenure   2.624860
4          MonthlyCharge  13.131709
5         dummy_One year   1.376427
6         dummy_Two Year   1.438423
7      dummy_Fiber Optic   2.868109
8            TechSupport   1.625615
9           Replacements   9.795162
10           Reliability   9.956283


In [14]:
#Assign independent variables to dataframe X, remove Responses 
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Replacements","Reliability"]])

#Create VIF dataframe [In-text citation: GeeksforGeeks]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

#Calculate VIF for each independent variable
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]

#print VIF data
print(vif_data)

                feature       VIF
0    Outage_sec_perweek  8.928276
1              Contacts  1.962045
2  Yearly_equip_failure  1.378012
3                Tenure  2.598159
4        dummy_One year  1.370356
5        dummy_Two Year  1.433700
6     dummy_Fiber Optic  2.762826
7           TechSupport  1.582278
8          Replacements  8.952135
9           Reliability  9.065586


## Backwards Stepwise Regression

In [16]:
#Iterate on logistic regression model, remove Responses, Fixes, and MonthlyCharge (VIF > 10)
X = pd.DataFrame(df_churn[["Outage_sec_perweek","Contacts","Yearly_equip_failure","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Replacements","Reliability"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387693
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.330
Date:,2024-02-05 17:20,AIC:,7775.8533
No. Observations:,10000,BIC:,7855.1670
Df Model:,10,Log-Likelihood:,-3876.9
Df Residuals:,9989,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.7702,0.1834,9.6498,0.0000,1.4107,2.1298
Outage_sec_perweek,0.0053,0.0095,0.5618,0.5743,-0.0133,0.0239
Contacts,0.0387,0.0286,1.3522,0.1763,-0.0174,0.0947
Yearly_equip_failure,-0.0398,0.0450,-0.8839,0.3767,-0.1279,0.0484
Tenure,-0.0624,0.0015,-42.0431,0.0000,-0.0654,-0.0595
dummy_One year,-1.7213,0.0783,-21.9968,0.0000,-1.8746,-1.5679
dummy_Two Year,-1.8604,0.0770,-24.1537,0.0000,-2.0114,-1.7095
dummy_Fiber Optic,-0.7526,0.0597,-12.6072,0.0000,-0.8696,-0.6356
TechSupport,0.0954,0.0584,1.6337,0.1023,-0.0191,0.2099


In [17]:
#Iterate on logistic regression model, remove Outage_sec_perweek (p-value > 0.05)
X = pd.DataFrame(df_churn[["Contacts","Yearly_equip_failure","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Replacements","Reliability"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387708
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7774.1689
No. Observations:,10000,BIC:,7846.2723
Df Model:,9,Log-Likelihood:,-3877.1
Df Residuals:,9990,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.8238,0.1568,11.6305,0.0000,1.5164,2.1311
Contacts,0.0389,0.0286,1.3610,0.1735,-0.0171,0.0950
Yearly_equip_failure,-0.0394,0.0450,-0.8758,0.3811,-0.1275,0.0487
Tenure,-0.0624,0.0015,-42.0431,0.0000,-0.0654,-0.0595
dummy_One year,-1.7210,0.0782,-21.9940,0.0000,-1.8744,-1.5676
dummy_Two Year,-1.8600,0.0770,-24.1520,0.0000,-2.0110,-1.7091
dummy_Fiber Optic,-0.7522,0.0597,-12.6012,0.0000,-0.8692,-0.6352
TechSupport,0.0952,0.0584,1.6296,0.1032,-0.0193,0.2097
Replacements,-0.0191,0.0275,-0.6940,0.4877,-0.0731,0.0349


In [18]:
#Iterate on logistic regression model, remove Replacements (p-value > 0.05)
X = pd.DataFrame(df_churn[["Contacts","Yearly_equip_failure","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Reliability"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387733
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7772.6507
No. Observations:,10000,BIC:,7837.5437
Df Model:,8,Log-Likelihood:,-3877.3
Df Residuals:,9991,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.7566,0.1232,14.2568,0.0000,1.5151,1.9981
Contacts,0.0391,0.0286,1.3689,0.1710,-0.0169,0.0952
Yearly_equip_failure,-0.0390,0.0450,-0.8677,0.3856,-0.1271,0.0491
Tenure,-0.0624,0.0015,-42.0456,0.0000,-0.0654,-0.0595
dummy_One year,-1.7224,0.0782,-22.0153,0.0000,-1.8757,-1.5690
dummy_Two Year,-1.8601,0.0770,-24.1560,0.0000,-2.0110,-1.7092
dummy_Fiber Optic,-0.7522,0.0597,-12.6020,0.0000,-0.8692,-0.6352
TechSupport,0.0946,0.0584,1.6199,0.1053,-0.0199,0.2091
Reliability,-0.0249,0.0276,-0.9037,0.3662,-0.0790,0.0291


In [19]:
#Iterate on logistic regression model, remove Yearly_equip_failure (p-value > 0.05)
X = pd.DataFrame(df_churn[["Contacts","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport","Reliability"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387770
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7771.4055
No. Observations:,10000,BIC:,7829.0882
Df Model:,7,Log-Likelihood:,-3877.7
Df Residuals:,9992,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.7423,0.1221,14.2740,0.0000,1.5030,1.9815
Contacts,0.0392,0.0286,1.3703,0.1706,-0.0169,0.0952
Tenure,-0.0625,0.0015,-42.0539,0.0000,-0.0654,-0.0595
dummy_One year,-1.7232,0.0782,-22.0267,0.0000,-1.8765,-1.5699
dummy_Two Year,-1.8600,0.0770,-24.1580,0.0000,-2.0109,-1.7091
dummy_Fiber Optic,-0.7523,0.0597,-12.6039,0.0000,-0.8693,-0.6353
TechSupport,0.0935,0.0584,1.6013,0.1093,-0.0209,0.2079
Reliability,-0.0250,0.0276,-0.9061,0.3649,-0.0790,0.0291


In [20]:
#Iterate on logistic regression model, remove Yearly_equip_failure (p-value > 0.05)
X = pd.DataFrame(df_churn[["Contacts","Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387811
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7770.2266
No. Observations:,10000,BIC:,7820.6990
Df Model:,6,Log-Likelihood:,-3878.1
Df Residuals:,9993,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.6550,0.0746,22.1819,0.0000,1.5087,1.8012
Contacts,0.0391,0.0286,1.3678,0.1714,-0.0169,0.0951
Tenure,-0.0625,0.0015,-42.0491,0.0000,-0.0654,-0.0595
dummy_One year,-1.7221,0.0782,-22.0201,0.0000,-1.8754,-1.5688
dummy_Two Year,-1.8598,0.0770,-24.1570,0.0000,-2.0107,-1.7089
dummy_Fiber Optic,-0.7524,0.0597,-12.6067,0.0000,-0.8694,-0.6355
TechSupport,0.0926,0.0584,1.5866,0.1126,-0.0218,0.2070


In [21]:
#Iterate on logistic regression model, remove Contacts (p-value > 0.05)
X = pd.DataFrame(df_churn[["Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic","TechSupport"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.387905
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7770.0934
No. Observations:,10000,BIC:,7813.3555
Df Model:,5,Log-Likelihood:,-3879.0
Df Residuals:,9994,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.6938,0.0691,24.5100,0.0000,1.5584,1.8293
Tenure,-0.0624,0.0015,-42.0467,0.0000,-0.0653,-0.0595
dummy_One year,-1.7199,0.0782,-22.0043,0.0000,-1.8731,-1.5667
dummy_Two Year,-1.8592,0.0770,-24.1535,0.0000,-2.0101,-1.7083
dummy_Fiber Optic,-0.7534,0.0597,-12.6253,0.0000,-0.8703,-0.6364
TechSupport,0.0916,0.0584,1.5703,0.1163,-0.0227,0.2060


In [22]:
#Iterate on logistic regression model, remove TechSupport (p-value > 0.05)
X = pd.DataFrame(df_churn[["Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_initial = sm.Logit(y, X).fit()

mdl_initial.summary2()

Optimization terminated successfully.
         Current function value: 0.388028
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7770.5569
No. Observations:,10000,BIC:,7806.6086
Df Model:,4,Log-Likelihood:,-3880.3
Df Residuals:,9995,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.7298,0.0653,26.4813,0.0000,1.6018,1.8579
Tenure,-0.0624,0.0015,-42.0470,0.0000,-0.0653,-0.0595
dummy_One year,-1.7200,0.0781,-22.0110,0.0000,-1.8732,-1.5669
dummy_Two Year,-1.8592,0.0770,-24.1583,0.0000,-2.0101,-1.7084
dummy_Fiber Optic,-0.7557,0.0596,-12.6686,0.0000,-0.8726,-0.6387


## Reduced Logistic Regression Model

In [24]:
#Iterate on logistic regression model, remove TechSupport (p-value > 0.05)
X = pd.DataFrame(df_churn[["Tenure","dummy_One year","dummy_Two Year","dummy_Fiber Optic"]])
X = sm.add_constant(X)

y = pd.DataFrame(df_churn[["Churn"]])

mdl_reduced = sm.Logit(y, X).fit()

mdl_reduced.summary2()

Optimization terminated successfully.
         Current function value: 0.388028
         Iterations 7


0,1,2,3
Model:,Logit,Method:,MLE
Dependent Variable:,Churn,Pseudo R-squared:,0.329
Date:,2024-02-05 17:20,AIC:,7770.5569
No. Observations:,10000,BIC:,7806.6086
Df Model:,4,Log-Likelihood:,-3880.3
Df Residuals:,9995,LL-Null:,-5782.2
Converged:,1.0000,LLR p-value:,0.0000
No. Iterations:,7.0000,Scale:,1.0000

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
const,1.7298,0.0653,26.4813,0.0000,1.6018,1.8579
Tenure,-0.0624,0.0015,-42.0470,0.0000,-0.0653,-0.0595
dummy_One year,-1.7200,0.0781,-22.0110,0.0000,-1.8732,-1.5669
dummy_Two Year,-1.8592,0.0770,-24.1583,0.0000,-2.0101,-1.7084
dummy_Fiber Optic,-0.7557,0.0596,-12.6686,0.0000,-0.8726,-0.6387


## Confusion Matrix

In [25]:
mdl_reduced.pred_table()

array([[6521.,  829.],
       [ 985., 1665.]])

## Accuracy Calculation

In [55]:
#Calculate Accuracy Score 

#Calculate numerator
TP_TN = 6521 + 1665

#Calculate denominator
All_predictions = 6521 + 829 + 985 + 1665

#Divide true positive + true negative by total number of predictions
accuracy_score = TP_TN / All_predictions
print(accuracy_score)

0.8186
