In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, classification_report, jaccard_score, log_loss
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('data/churn_data.csv')

In [3]:
columns = [
    'tenure',
    'age',
    'address',
    'income',
    'ed',
    'employ',
    'equip',
    'callcard',
    'wireless', 
    'churn'
]

In [4]:
churn = df[columns].copy()
churn['churn'] = churn['churn'].astype('int')

In [5]:
scaler = StandardScaler()

In [6]:
columns = [
    'tenure',
    'age',
    'address',
    'income',
    'ed',
    'employ',
    'equip'
]

In [7]:
X = np.asarray(churn[columns])
X = scaler.fit_transform(X)

In [8]:
y = np.asarray(churn['churn'])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

In [10]:
lr = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train)

In [11]:
y_hat = lr.predict(X_test)
y_hat_prob = lr.predict_proba(X_test)

In [13]:
confusion_matrix(y_test, y_hat)

array([[24,  1],
       [ 9,  6]])

In [14]:
print(classification_report(y_test, y_hat))

              precision    recall  f1-score   support

           0       0.73      0.96      0.83        25
           1       0.86      0.40      0.55        15

    accuracy                           0.75        40
   macro avg       0.79      0.68      0.69        40
weighted avg       0.78      0.75      0.72        40



### Log loss

Now, lets try **log loss** for evaluation. In logistic regression, the output can be the probability of customer churn is yes (or equals to 1). This probability is a value between 0 and 1.
Log loss( Logarithmic loss) measures the performance of a classifier where the predicted output is a probability value between 0 and 1. 


In [15]:
log_loss(y_test, y_hat_prob)

0.6017092478101185