In [3]:
from sklearn.linear_model import LogisticRegression 
import numpy as np
import pandas as pd

In [38]:
# Load Preprocessed Data

telco = pd.read_table("Telco_data_preproc.csv", sep=",")

telco.drop("Unnamed: 0", inplace=True, axis=1)

print(telco)

      customerID  gender_proc  partner_proc  dependents_proc  \
0     7590-VHVEG            1             1                0   
1     5575-GNVDE            0             0                0   
2     3668-QPYBK            0             0                0   
3     7795-CFOCW            0             0                0   
4     9237-HQITU            1             0                0   
5     9305-CDSKC            1             0                0   
6     1452-KIOVK            0             0                1   
7     6713-OKOMC            1             0                0   
8     7892-POOKP            1             1                0   
9     6388-TABGU            0             0                1   
10    9763-GRSKD            0             1                1   
11    7469-LKBCI            0             0                0   
12    8091-TTVAX            0             1                0   
13    0280-XJGEX            0             0                0   
14    5129-JLPIS            0           

In [43]:
# Test Logistic Regression with Gender

x = np.array(telco["gender_proc"]).reshape(-1,1)
y = telco["churn_proc"]

model_1 = LogisticRegression(solver="lbfgs", random_state=0).fit(x,y)

model_1.score(x,y)


0.7346301292063041

In [62]:
# Test with multiple columns


x = np.array(telco[["gender_proc", "partner_proc", "dependents_proc", "phone_service_proc", 
                        "churn_proc", "paperless_billing_proc", "multiple_lines_proc", "internet_service_proc", 
                        "online_security_proc", "device_protection_proc", "tech_support_proc", "streaming_tv_proc",
                        "streaming_movies_proc", "tenure", "contract_proc", "payment_method_proc", "MonthlyCharges", 
                        "TotalCharges", "SeniorCitizen", "online_backup_proc"]])
y = telco["churn_proc"]

model_full_1 = LogisticRegression(random_state=0, solver="lbfgs", max_iter=1000).fit(x,y)

print("Score: ",model_full_1.score(x,y))
print("\nDecision: ",model_full_1.decision_function(x))
print("\nCoeffs: ",model_full_1.coef_)
print("\nIntercept: ",model_full_1.intercept_)

Score:  1.0

Decision:  [-5.55186334 -6.84058969  5.15904682 ... -6.00698866  5.80872449
 -7.0091275 ]

Coeffs:  [[-3.91518816e-02 -7.35180521e-02 -1.37967601e-01 -1.88987434e-01
   1.09190572e+01  1.02763912e-01  8.22212190e-02  3.47492190e-01
  -4.89729498e-02  2.34229311e-02 -5.85307255e-02  7.02479571e-02
   7.76544308e-02 -1.82286742e-02 -4.71699689e-01 -1.05645698e-01
   3.62681561e-03  3.76387088e-05  1.15706871e-01  2.39673943e-02]]

Intercept:  [-5.28118394]




In [60]:
# Test with multiple columns with Ridge


x = np.array(telco[["gender_proc", "partner_proc", "dependents_proc", "phone_service_proc", 
                        "churn_proc", "paperless_billing_proc", "multiple_lines_proc", "internet_service_proc", 
                        "online_security_proc", "device_protection_proc", "tech_support_proc", "streaming_tv_proc",
                        "streaming_movies_proc", "tenure", "contract_proc", "payment_method_proc", "MonthlyCharges", 
                        "TotalCharges", "SeniorCitizen", "online_backup_proc"]])
y = telco["churn_proc"]

model_full_ridge_1 = LogisticRegression(random_state=0, solver="lbfgs", max_iter=1000, penalty="l2").fit(x,y)

print("Score: ",model_full_ridge_1.score(x,y))
print("\nDecision: ",model_full_ridge_1.decision_function(x))
print("\nCoeffs: ",model_full_ridge_1.coef_)
print("\nIntercept: ",model_full_ridge_1.intercept_)

Score:  1.0

Decision:  [-5.55186334 -6.84058969  5.15904682 ... -6.00698866  5.80872449
 -7.0091275 ]

Coeffs:  [[-3.91518816e-02 -7.35180521e-02 -1.37967601e-01 -1.88987434e-01
   1.09190572e+01  1.02763912e-01  8.22212190e-02  3.47492190e-01
  -4.89729498e-02  2.34229311e-02 -5.85307255e-02  7.02479571e-02
   7.76544308e-02 -1.82286742e-02 -4.71699689e-01 -1.05645698e-01
   3.62681561e-03  3.76387088e-05  1.15706871e-01  2.39673943e-02]]

Intercept:  [-5.28118394]




In [59]:
# Test with multiple columns with Lasso


x = np.array(telco[["gender_proc", "partner_proc", "dependents_proc", "phone_service_proc", 
                        "churn_proc", "paperless_billing_proc", "multiple_lines_proc", "internet_service_proc", 
                        "online_security_proc", "device_protection_proc", "tech_support_proc", "streaming_tv_proc",
                        "streaming_movies_proc", "tenure", "contract_proc", "payment_method_proc", "MonthlyCharges", 
                        "TotalCharges", "SeniorCitizen", "online_backup_proc"]])
y = telco["churn_proc"]

model_full_lasso_1 = LogisticRegression(random_state=0, solver="saga", max_iter=1000, penalty="l1").fit(x,y)

print("Score: ",model_full_lasso_1.score(x,y))
print("\nDecision: ",model_full_lasso_1.decision_function(x))
print("\nCoeffs: ",model_full_lasso_1.coef_)
print("\nIntercept: ",model_full_lasso_1.intercept_)

Score:  0.7982393866250177

Decision:  [ 0.09043863 -2.02810979  0.26820056 ... -0.79359334  0.41970047
 -1.77957424]

Coeffs:  [[-4.24266686e-03 -4.07907721e-03 -7.74455941e-03 -1.02381472e-02
   5.45234789e-02  4.82338664e-03  1.69053608e-02 -5.38128430e-05
  -5.70715744e-03  5.18366123e-03 -6.39466958e-03  1.39753511e-02
   1.41328830e-02 -1.05809268e-01 -2.86069217e-02 -3.79491856e-02
   9.58249134e-03  6.43011947e-04  5.46274721e-03  3.98118459e-03]]

Intercept:  [-0.00476086]




In [17]:
import pandas as pd
import numpy as np

In [18]:
df = pd.read_csv("telco_preprocessed.csv", index_col = 0)