In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
pd.set_option('display.max_columns', None)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
import warnings 
warnings.filterwarnings("ignore")

In [2]:
churnData = pd.read_csv('Customer-Churn.csv')
churnData.dtypes
churnData.shape

(7043, 21)

In [3]:
churnData.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [4]:
X = churnData[['tenure', 'SeniorCitizen','MonthlyCharges']]
y = pd.DataFrame(data=churnData, columns=['Churn'])
X.head()

Unnamed: 0,tenure,SeniorCitizen,MonthlyCharges
0,1,0,29.85
1,34,0,56.95
2,2,0,53.85
3,45,0,42.3
4,2,0,70.7


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [6]:
X_train.head()

Unnamed: 0,tenure,SeniorCitizen,MonthlyCharges
4304,49,0,19.0
2162,28,0,96.6
5557,5,0,80.2
877,51,0,60.15
3239,16,0,49.45


In [9]:
X_train['SeniorCitizen'].value_counts()

0    4147
1     783
Name: SeniorCitizen, dtype: int64

In [7]:
y_train.head()

Unnamed: 0,Churn
4304,No
2162,No
5557,No
877,No
3239,No


In [10]:

scaler = Normalizer().fit(X_train)

train_scaled = scaler.transform(X_train)
test_scaled = scaler.transform(X_test)

In [None]:
# scaler2 = Normalizer().fit(X_test)
# test_scaled = scaler2.transform(X_test)

In [11]:
model = LogisticRegression(solver='lbfgs',multi_class='ovr')
model = model.fit(train_scaled, y_train)

In [None]:
# SIGMOID FUNCTION

In [12]:
model.score(test_scaled, y_test)

0.7827733080927591

In [13]:
y_predictions = model.predict(test_scaled)
y_predictions[:10]

array(['No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No'],
      dtype=object)

In [16]:
y_test[:10]

Unnamed: 0,Churn
2895,No
5056,No
4325,No
4507,No
2031,No
4227,No
4182,Yes
1855,No
2765,No
1454,No


In [15]:
model.predict_proba(test_scaled)[:10]

array([[0.97447326, 0.02552674],
       [0.91548629, 0.08451371],
       [0.43378878, 0.56621122],
       [0.50656244, 0.49343756],
       [0.58401769, 0.41598231],
       [0.91640747, 0.08359253],
       [0.66678747, 0.33321253],
       [0.97946957, 0.02053043],
       [0.57020903, 0.42979097],
       [0.96282236, 0.03717764]])

In [17]:
churnData['Churn'].value_counts()

No     5174
Yes    1869
Name: Churn, dtype: int64

In [19]:
5174/(1869+5174)

0.7346301292063041

In [20]:
5174/(189+5174)

0.9647585306731307