# **With Stratify**

In [None]:
import pandas as pd
import numpy as np
telco = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv", na_values="?")
telco.dropna(inplace=True)
telco.reset_index(drop=True)
telco.drop(labels=["customerID","tenure","MonthlyCharges","TotalCharges" ], axis=1, inplace=True)
telco = telco.replace({
    "Churn":{
        "No":0,
        "Yes":1
    }
})
data_x = telco.loc[:, telco.columns != "Churn"]
data_y = telco.loc[:, "Churn"]

telco.head(10)

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,Churn
0,Female,0,Yes,No,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,0
1,Male,0,No,No,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,0
2,Male,0,No,No,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,1
3,Male,0,No,No,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),0
4,Female,0,No,No,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,1
5,Female,0,No,No,Yes,Yes,Fiber optic,No,No,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,1
6,Male,0,No,Yes,Yes,Yes,Fiber optic,No,Yes,No,No,Yes,No,Month-to-month,Yes,Credit card (automatic),0
7,Female,0,No,No,No,No phone service,DSL,Yes,No,No,No,No,No,Month-to-month,No,Mailed check,0
8,Female,0,Yes,No,Yes,Yes,Fiber optic,No,No,Yes,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,1
9,Male,0,No,Yes,Yes,No,DSL,Yes,Yes,No,No,No,No,One year,No,Bank transfer (automatic),0


**In this part we have split the dataset and create train dataset and test dataset**

In [None]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, train_size=0.8, stratify=data_y, random_state=911)
train_y = list(train_y)
P_0 = train_y.count(0)/len(train_y)
P_1 = train_y.count(1)/len(train_y)

**In this part we have created the likelihood table and store those information in a dictionary**

In [None]:
probability = {}

def fit(feature , label):
  unique_value = train_x[feature].unique()
  merge = list(zip(train_x[feature], label))
  if(feature not in probability):
    probability[feature] = {}
  # print(probability)
  for a in unique_value:
    t1 = (a, 0)
    t2 = (a, 1)
    value_0 = calculate_probability(t1, merge)
    value_1 = calculate_probability(t2, merge)
    if a not in probability[feature]:
      probability[feature][a] = {}
      probability[feature][a]['0'] = value_0
      probability[feature][a]['1'] = value_1
    # print(value_0)
    # print(value_1)

def calculate_probability(t1, merge):
  p_xy = merge.count(t1)
  probability = p_xy/train_y.count(t1[1])
  return probability

for a in train_x:
  fit(a, train_y)
print(probability)

{'gender': {'Female': {'0': 0.4996375936216477, '1': 0.4983277591973244}, 'Male': {'0': 0.5003624063783523, '1': 0.5016722408026756}}, 'SeniorCitizen': {1: {'0': 0.1253926069098816, '1': 0.2568561872909699}, 0: {'0': 0.8746073930901184, '1': 0.7431438127090301}}, 'Partner': {'Yes': {'0': 0.5257308528630104, '1': 0.35451505016722407}, 'No': {'0': 0.4742691471369896, '1': 0.6454849498327759}}, 'Dependents': {'No': {'0': 0.6569219618265282, '1': 0.831438127090301}, 'Yes': {'0': 0.3430780381734719, '1': 0.16856187290969898}}, 'PhoneService': {'Yes': {'0': 0.9021502778448901, '1': 0.9036789297658863}, 'No': {'0': 0.09784972215510992, '1': 0.09632107023411371}}, 'MultipleLines': {'Yes': {'0': 0.40589514375453006, '1': 0.45953177257525085}, 'No': {'0': 0.49625513409036, '1': 0.44414715719063547}, 'No phone service': {'0': 0.09784972215510992, '1': 0.09632107023411371}}, 'InternetService': {'Fiber optic': {'0': 0.35153418700169126, '1': 0.6923076923076923}, 'DSL': {'0': 0.37424498671176615, '1

**In this part we will predict the probabilities**

In [None]:
predict_result = []
def predict(feature):
  p_0 = P_0
  p_1 = P_1
  # print(feature.index)
  i = 0
  for a in feature:
    # print(a)
    p_0 *= probability[feature.index[i]][a]['0']
    p_1 *= probability[feature.index[i]][a]['1']
    i += 1
  probability_no = p_0/(p_0 + p_1)
  probability_yes = p_1/(p_0 + p_1)
  # print(probability_no + probability_yes)

  if probability_no > probability_yes:
    predict_result.append(0)
  else:
    predict_result.append(1)

  return predict_result

for a in test_x.iterrows():
  predict_result = predict(a[1])
  # print(a[1].index)
print(predict_result)
# print(predict_result.count(1))

[0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 

**In This part we will calculate the accuracy of this algorithm**

In [None]:
test_y = list(test_y)
count = 0
i = 0
for a in test_y:
  if a == predict_result[i]:
    count += 1
  i += 1
accuracy = count * 100/ len(test_y)
print("The accuracy of this algorithm is:{}%".format(accuracy))

The accuracy of this algorithm is:72.53371185237758%


**In this part we will calculate precission, recall and f1_score**



In [None]:
tp = (1, 1)
tn = (0, 0)
fp = (1, 0)
fn = (0, 1)

mer = list(zip(predict_result, test_y))
true_positive = mer.count(tp)
true_negative = mer.count(tn)
false_positive = mer.count(fp)
false_negative = mer.count(fn)

precission = true_positive/(true_positive + false_positive)
recall = true_positive/(true_positive + false_negative)
f1_score = (2*precission*recall)/(precission + recall)

print("The precission value of this algorithm is: {}".format(precission))
print("The recall value of this algorithm is: {}".format(recall))
print("F1 score of this algorithm is: {}".format(f1_score))

The precission value of this algorithm is: 0.4895330112721417
The recall value of this algorithm is: 0.8128342245989305
F1 score of this algorithm is: 0.6110552763819096


# **Without Stratify**

In [None]:
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, train_size=0.8, random_state=911)
train_y = list(train_y)
P_0 = train_y.count(0)/len(train_y)
P_1 = train_y.count(1)/len(train_y)

**In this part we have created the likelihood table and store those information in a dictionary**

In [None]:
probability_ws = {}

def fit_ws(feature , label):
  unique_value = train_x[feature].unique()
  merge = list(zip(train_x[feature], label))
  if(feature not in probability_ws):
    probability_ws[feature] = {}
  # print(probability)
  for a in unique_value:
    t1 = (a, 0)
    t2 = (a, 1)
    value_0 = calculate_probability_ws(t1, merge)
    value_1 = calculate_probability_ws(t2, merge)
    if a not in probability_ws[feature]:
      probability_ws[feature][a] = {}
      probability_ws[feature][a]['0'] = value_0
      probability_ws[feature][a]['1'] = value_1
    # print(value_0)
    # print(value_1)

def calculate_probability_ws(t1, merge):
  p_xy = merge.count(t1)
  probability = p_xy/train_y.count(t1[1])
  return probability

for a in train_x:
  fit_ws(a, train_y)
print(probability_ws)

{'gender': {'Female': {'0': 0.4905797101449275, '1': 0.49732262382864795}, 'Male': {'0': 0.5094202898550725, '1': 0.5026773761713521}}, 'SeniorCitizen': {0: {'0': 0.8705314009661835, '1': 0.7516733601070951}, 1: {'0': 0.1294685990338164, '1': 0.24832663989290496}}, 'Partner': {'Yes': {'0': 0.5294685990338164, '1': 0.35609103078982596}, 'No': {'0': 0.47053140096618357, '1': 0.643908969210174}}, 'Dependents': {'No': {'0': 0.658695652173913, '1': 0.8232931726907631}, 'Yes': {'0': 0.34130434782608693, '1': 0.17670682730923695}}, 'PhoneService': {'Yes': {'0': 0.9026570048309178, '1': 0.9036144578313253}, 'No': {'0': 0.09734299516908212, '1': 0.0963855421686747}}, 'MultipleLines': {'Yes': {'0': 0.4103864734299517, '1': 0.4497991967871486}, 'No': {'0': 0.49227053140096616, '1': 0.4538152610441767}, 'No phone service': {'0': 0.09734299516908212, '1': 0.0963855421686747}}, 'InternetService': {'No': {'0': 0.2746376811594203, '1': 0.060240963855421686}, 'Fiber optic': {'0': 0.34347826086956523, '

**In this part we will predict the probabilities**

In [None]:
predict_result_ws = []
def predict_ws(feature):
  p_0 = P_0
  p_1 = P_1
  # print(feature.index)
  i = 0
  for a in feature:
    # print(a)
    p_0 *= probability_ws[feature.index[i]][a]['0']
    p_1 *= probability_ws[feature.index[i]][a]['1']
    i += 1
  probability_no = p_0/(p_0 + p_1)
  probability_yes = p_1/(p_0 + p_1)
  # print(probability_no + probability_yes)

  if probability_no > probability_yes:
    predict_result_ws.append(0)
  else:
    predict_result_ws.append(1)

  return predict_result_ws

for a in test_x.iterrows():
  predict_result_ws = predict_ws(a[1])
  # print(a[1].index)
print(predict_result_ws)
# print(predict_result.count(1))

[1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 

**In this part we will calculate accuracy,precission, recall and f1_score**

In [None]:
test_y = list(test_y)
count = 0
i = 0
for a in test_y:
  if a == predict_result_ws[i]:
    count += 1
  i += 1
accuracy = count * 100/ len(test_y)
print("The accuracy of this algorithm is:{}%".format(accuracy))

mer = list(zip(predict_result_ws, test_y))
true_positive = mer.count(tp)
true_negative = mer.count(tn)
false_positive = mer.count(fp)
false_negative = mer.count(fn)

precission = true_positive/(true_positive + false_positive)
recall = true_positive/(true_positive + false_negative)
f1_score = (2*precission*recall)/(precission + recall)

print("The precission value of this algorithm is: {}".format(precission))
print("The recall value of this algorithm is: {}".format(recall))
print("F1 score of this algorithm is: {}".format(f1_score))

The accuracy of this algorithm is:70.75940383250533%
The precission value of this algorithm is: 0.4715821812596006
The recall value of this algorithm is: 0.8186666666666667
F1 score of this algorithm is: 0.5984405458089668
