In [2]:
import json
import os
import time
from pathlib import Path

import numpy as np
import pandas as pd
import requests
import yaml
from box import ConfigBox
from sklearn.metrics import f1_score
from tqdm import tqdm

In [3]:
# local testing
API_URL = "http://0.0.0.0:8080" 
# testing deployed service
# API_URL = "https://<YOUR_APP_NAME>.fly.dev" 

API_URL

'http://0.0.0.0:8080'

In [4]:
def load_params(params_path):
    with open(params_path, "r") as f:
        params = yaml.safe_load(f)
        params = ConfigBox(params)
    return params


proj_path = Path(os.getcwd()).parent.absolute()
params = load_params(proj_path/'params.yaml')
feat_cols = params.base.feat_cols
targ_col = params.base.targ_col
feat_cols, targ_col

(BoxList(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']),
 'Exited')

In [5]:
df = pd.concat([pd.read_csv(proj_path/'data'/'raw'/f'Churn_Modelling_{country}.csv') for country in ['France', 'Spain']])
df.head()

Unnamed: 0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
2,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
3,15592531,Bartlett,822,France,Male,50,7,0.0,2,1,1,10062.8,0
4,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.5,0


In [6]:
my_obj = [{
    'CreditScore': 755,
    'Age': 40,
    'Tenure': 2,
    'Balance': 137430.82,
    'NumOfProducts': 2,
    'HasCrCard': 0,
    'IsActiveMember': 0,
    'EstimatedSalary': 176768.59,
    'Exited': 0
  }]

x = requests.post(API_URL + '/predict', json = my_obj)
if x.ok:
    probs = x.json()
else:
    x.raise_for_status()

probs

[0.905]

In [7]:
def get_prob(obj):
    x = requests.post(API_URL + '/predict', json = obj)
    if x.ok:
        probs = x.json()
    else:
        x.raise_for_status()
    return probs

get_prob(my_obj)

[0.905]

In [8]:
import time


def send_sample_requests(df):
    f1_score_list = []
    for _ in tqdm(range(50)):
        df_sample = df.sample(n=60)
        y_true = df_sample[targ_col].values
        obj = df_sample[feat_cols].to_dict('records')
        probs = get_prob(obj)
        y_pred = np.array([prob < 0.5 for prob in probs]).astype(int)
        f1 = f1_score(y_true, y_pred)
        f1_score_list.append(f1)
        time.sleep(1)
    return f1_score_list
        
f1_score_list = send_sample_requests(df)
f1_score_list

100%|██████████| 50/50 [00:52<00:00,  1.05s/it]


[1.0,
 1.0,
 0.9565217391304348,
 0.8571428571428571,
 0.9090909090909091,
 1.0,
 0.9523809523809523,
 0.9333333333333333,
 0.9333333333333333,
 0.9473684210526316,
 0.9411764705882353,
 0.8333333333333333,
 0.9090909090909091,
 0.923076923076923,
 0.5714285714285715,
 0.9473684210526316,
 0.6666666666666666,
 0.8421052631578948,
 0.923076923076923,
 0.9411764705882353,
 0.9473684210526316,
 0.9411764705882353,
 0.9333333333333333,
 0.9523809523809523,
 0.9523809523809523,
 0.8571428571428571,
 0.75,
 0.888888888888889,
 1.0,
 0.888888888888889,
 1.0,
 0.9473684210526316,
 0.9655172413793104,
 1.0,
 0.8571428571428571,
 0.8421052631578948,
 1.0,
 0.9473684210526316,
 0.8799999999999999,
 1.0,
 0.8421052631578948,
 1.0,
 1.0,
 0.888888888888889,
 0.8571428571428571,
 0.9,
 0.9333333333333333,
 0.9473684210526316,
 0.8,
 1.0]

In [9]:
df_germany =  pd.read_csv(proj_path/'data'/'more_data'/'Churn_Modelling_Germany.csv')
# time.sleep(20) # this is to create a pause between requests. Later it'll be easier to distinguish the two types if requests
f1_score_list = send_sample_requests(df_germany)
f1_score_list

  0%|          | 0/50 [00:00<?, ?it/s]


KeyError: 'Exited'

In [10]:
x = requests.get(API_URL + '/drift_data')
if x.ok:
    data = x.json()
else:
    x.raise_for_status()

df_p_vals = pd.DataFrame(json.loads(data))
df_p_vals

Unnamed: 0,time,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,2023-01-31 21:50:55.110631,0.055723,0.978798,0.832680,0.921336,0.145549,1.000000,1.000000,0.874650
1,2023-01-31 21:50:56.069961,0.206015,0.998430,0.196414,0.485389,1.000000,0.998416,0.608368,0.262404
2,2023-01-31 21:50:57.127098,0.569845,0.903876,0.953986,0.623703,1.000000,1.000000,0.999087,0.890148
3,2023-01-31 21:50:58.182128,0.521296,0.417589,0.584976,0.988387,0.995000,1.000000,1.000000,0.134168
4,2023-01-31 21:50:59.240637,0.733509,0.999365,0.068259,0.308989,0.995000,0.953637,0.712223,0.607063
...,...,...,...,...,...,...,...,...,...
95,2023-01-31 22:22:19.608795,0.197072,0.357684,0.790378,0.739466,0.784800,0.586960,0.990236,0.750290
96,2023-01-31 22:22:20.658040,0.422286,0.612022,0.373441,0.564114,0.945641,0.387848,0.497856,0.611413
97,2023-01-31 22:22:21.710600,0.195626,0.823026,0.730777,0.587478,0.750037,0.953637,0.999087,0.001212
98,2023-01-31 22:22:22.767501,0.093379,0.698197,0.913888,0.239719,1.000000,0.733423,0.999978,0.603501


In [11]:
import plotly_express as px

fig = px.line(df_p_vals, x='time', y=feat_cols)
fig.add_hline(y=0.05, line_color='red')

ModuleNotFoundError: No module named 'plotly_express'