### Training the model

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, KFold
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

In [3]:
df = pd.read_csv("../03-classification/data/telco-customer-churn.csv")

df.columns = df.columns.str.lower().str.replace(" ", "_")

cat_cols = df.dtypes[df.dtypes == "object"].index

for c in cat_cols:
    df[c] = df[c].str.lower().str.replace(" ", "_")

df.totalcharges = pd.to_numeric(df.totalcharges, errors="coerce")
df.totalcharges = df.totalcharges.fillna(0)

df.churn = (df.churn == "yes").astype(int)

In [4]:
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)

In [7]:
num_vars = ['tenure', 'monthlycharges', 'totalcharges']

cat_vars = ['gender', 'seniorcitizen', 'partner', 'dependents',
       'phoneservice', 'multiplelines', 'internetservice',
       'onlinesecurity', 'onlinebackup', 'deviceprotection', 'techsupport',
       'streamingtv', 'streamingmovies', 'contract', 'paperlessbilling',
       'paymentmethod']

In [12]:
def train(df, y, C=1.0):
    dict_ = df[num_vars + cat_vars].to_dict(orient="records")

    dv = DictVectorizer(sparse=False)
    X = dv.fit_transform(dict_)

    model = LogisticRegression(C=C, max_iter=3000)
    model.fit(X, y)

    return dv, model

In [15]:
def predict(df, dv, model):
    dict_ = df[num_vars + cat_vars].to_dict(orient="records")

    X = dv.transform(dict_)
    y_pred = model.predict_proba(X)[:, 1]

    return y_pred

In [16]:
C = 1.0
n_splits = 5
scores = []

kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)

for train_idx, val_idx in kf.split(df_full_train):
    df_train = df_full_train.iloc[train_idx]
    df_val = df_full_train.iloc[val_idx]

    y_train = df_train.churn.values
    y_val = df_val.churn.values

    dv, model = train(df_train, y_train, C=C)
    y_pred = predict(df_val, dv, model)

    auc = roc_auc_score(y_val, y_pred)
    scores.append(auc)

print(f"mean: {np.mean(scores):.3f} +-std: {np.std(scores):.3f}")

mean: 0.842 +-std: 0.007


In [21]:
y_full_train = df_full_train.churn.values
y_test = df_test.churn.values

dv, model = train(df_full_train, y_full_train, C=C)
y_pred = predict(df_test, dv, model)

auc = roc_auc_score(y_test, y_pred)
auc

0.8584952928813633

### Saving the model

In [23]:
import pickle

In [24]:
output_file = f"model_C={C}.bin"

with open(f"legacy_project/{output_file}", "wb") as f_out:
    pickle.dump((dv, model), f_out)

### Loading the model

In [27]:
input_file = "model_C=1.0.bin"

with open(f"legacy_project/{input_file}", "rb") as f_in:
    dv, model = pickle.load(f_in)

In [31]:
customer = {
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'yes',
    'dependents': 'no',
    'phoneservice': 'no',
    'multiplelines': 'no_phone_service',
    'internetservice': 'dsl',
    'onlinesecurity': 'no',
    'onlinebackup': 'yes',
    'deviceprotection': 'no',
    'techsupport': 'no',
    'streamingtv': 'no',
    'streamingmovies': 'no',
    'contract': 'two_year',
    'paperlessbilling': 'yes',
    'paymentmethod': 'electronic_check',
    'tenure': 1,
    'monthlycharges': 29.85,
    'totalcharges': 29.85
}

In [36]:
X = dv.transform([customer])
y_pred = model.predict_proba(X)[0, 1]

print("input: ", customer, end="\n\n")
print("output: ", y_pred)

input:  {'gender': 'female', 'seniorcitizen': 0, 'partner': 'yes', 'dependents': 'no', 'phoneservice': 'no', 'multiplelines': 'no_phone_service', 'internetservice': 'dsl', 'onlinesecurity': 'no', 'onlinebackup': 'yes', 'deviceprotection': 'no', 'techsupport': 'no', 'streamingtv': 'no', 'streamingmovies': 'no', 'contract': 'two_year', 'paperlessbilling': 'yes', 'paymentmethod': 'electronic_check', 'tenure': 1, 'monthlycharges': 29.85, 'totalcharges': 29.85}

output:  0.2952791913642216


### Making requests to the web service

In [8]:
import requests

url = "http://localhost:9696/predict"

customer = {
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'yes',
    'dependents': 'no',
    'phoneservice': 'no',
    'multiplelines': 'no_phone_service',
    'internetservice': 'dsl',
    'onlinesecurity': 'no',
    'onlinebackup': 'yes',
    'deviceprotection': 'no',
    'techsupport': 'no',
    'streamingtv': 'no',
    'streamingmovies': 'no',
    'contract': 'two_year',
    'paperlessbilling': 'yes',
    'paymentmethod': 'electronic_check',
    'tenure': 1,
    'monthlycharges': 29.85,
    'totalcharges': 29.85
}

response = requests.post(url, json=customer).json()
response

{'churn': False, 'churn_proba': 0.2952791913642216}

In [9]:
if response["churn"]:
    print("send promotional email")
else:
    print("dont send promotional email")

dont send promotional email
