Gebil

Description: This program demonstrates deploying a model with Dash(framework to build interactive apps).

In [1]:
import pandas as pd
data = pd.read_csv('data/travel_insurance.csv')

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63326 entries, 0 to 63325
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Agency                63326 non-null  object 
 1   Agency Type           63326 non-null  object 
 2   Distribution Channel  63326 non-null  object 
 3   Product Name          63326 non-null  object 
 4   Claim                 63326 non-null  object 
 5   Duration              63326 non-null  int64  
 6   Destination           63326 non-null  object 
 7   Net Sales             63326 non-null  float64
 8   Commision (in value)  63326 non-null  float64
 9   Gender                18219 non-null  object 
 10  Age                   63326 non-null  int64  
dtypes: float64(2), int64(2), object(7)
memory usage: 5.3+ MB


### Cleaning / Encoding

In [2]:
# Creates copy of data, drops Gender because EDA revealed it's mostly missing
df = data.drop(columns=['Gender']).copy()
categoricals = ['Agency', 'Agency Type', 'Distribution Channel', 'Product Name', 'Destination']

# Label encodes Claim
df['Claim'] = df.Claim.map(dict(Yes=1, No=0))
# One-hot encodes remaining categorical data
one_hots = pd.get_dummies(df[categoricals])

df = pd.concat([one_hots, df[df.columns.difference(categoricals)]], axis=1)

### Split

In [3]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Splits provided df into three ways (six total; input and target setsg). 
def tri_split(df, target: str, 
              train: float, validation: float, test: float, 
              standard_scale=False, random_state=None, replace=False, 
              sample_frac=1):
    
    # Verifies args equal 100%
    if sum([train, validation, test]) != 1:
        raise ValueError('Sum of sizes is not equal to one.')
    
    # Enables usage of standard_scale=True for all columns (except target, of course)
    if standard_scale is True:
        standard_scale = df.columns.difference([target])
        
    # Collects indices for each split, then splits
    indices = [int(train*len(df)), int((train+validation)*len(df))]
    train, validate, test = np.split(
        df.sample(frac=sample_frac, random_state=random_state, replace=replace), 
        indices_or_sections=indices
    )
            
    # Standardizes if specified to do so
    if standard_scale is not None:
        sc = StandardScaler()
        train[standard_scale]    = sc.fit_transform(train[standard_scale])
        validate[standard_scale] = sc.transform(validate[standard_scale])
        test[standard_scale]     = sc.transform(test[standard_scale])
        
    return dict(
        # Splits input data from target data  
        X_train = train.drop(columns=[target]).to_numpy(),
        y_train = train[target].to_numpy().reshape(-1,1),
        X_vali  = validate.drop(columns=[target]).to_numpy(),
        y_vali  = validate[target].to_numpy().reshape(-1,1),   
        X_test  = test.drop(columns=[target]).to_numpy(), 
        y_test  = test[target].to_numpy().reshape(-1,1),
        scaler  = (sc if sc else None)
    )

In [4]:
data_dict = tri_split(df, target='Claim', train=0.75, validation=0.15, test=0.10, standard_scale=True)

X_train, y_train, X_vali, y_vali, X_test, y_test, scaler= data_dict.values()

### Creating Model

In [5]:
from sklearn.tree import DecisionTreeClassifier

# Fits data to model
DT = DecisionTreeClassifier()
DT.fit(X_train, y_train)

# Collects predictions
pred = DT.predict(X_vali)

### Offloading Model

In [6]:
import joblib

# Serializing Scaler
joblib.dump(scaler, './Model/scaler.joblib')
# Serializing trained model
joblib.dump(DT, './Model/DT-model.h5')

['./Model/DT-model.h5']

### Loading Model

In [7]:
from keras.models import load_model

'''I ran into pickle issues here...'''
model = DT
scaler = scaler

# Solves Keras' issue
#model._make_predict_function()

### Dash

In [8]:
from jupyter_dash import JupyterDash

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

external_stylesheets = 'https://codepen.io/chriddyp/pen/bWLwgP.css'

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
server = app.server

In [9]:
# Holds built div objects 
divs = dict()

discreet_cols = ['Agency', 'Agency Type', 'Distribution Channel', 'Product Name', 'Destination']
continuious_cols = ['Duration', 'Net Sales', 'Age', 'Commision (in value)']
className = 'five columns'

# Collects options for each discreet value
options_dict = dict()
for label in discreet_cols:
    choices = data[label].unique()
    options_dict.update({
        label:[{'label': x, 'value': x} for x in choices]
    })

# Builds div and adds to dict [for discreet_cols]
for label, options in options_dict.items():
    # Builds HTML object
    input_label = dcc.Dropdown(
        id=label,
        options = options)
    div_label = html.Div(
        children=[html.H3(f'{label}:'), input_label],
        className=className)
    # Stores in dict
    divs.update({label:div_label})

# Builds div and adds to dict [for continuious_cols]
for label in continuious_cols:
    # Builds HTML object
    input_label = dcc.Input(
        id=label,
        type='numeric')
    div_label = html.Div(
        children=[html.H3(f'{label}:'), input_label],
        className=className)
    # Stores in dict
    divs.update({label:div_label})

In [10]:
# Div for numerical characteristics
div_numerical = html.Div(
 children = [divs.get(label) for label in continuious_cols],
 className="row")

# Div for categorical features
div_categorical = html.Div(
 children = [divs.get(label) for label in discreet_cols],
 className="row")

In [11]:
def get_prediction(**kwargs):
    cols = df.columns
    discreet_cols = ['Agency', 'Agency Type', 'Distribution Channel', 'Product Name', 'Destination']
    continuious_cols = ['Duration', 'Net Sales', 'Age', 'Commision (in value)']
    input_dict = dict(**kwargs)
    
    # Generates DF
    X = pd.DataFrame(np.zeros((1,len(cols))), columns=cols)
    
    # Updates values
    for label in discreet_cols:
        one_hot = '_'.join([label, input_dict.get(label)])
        X.loc[0, one_hot] = 1
    for label in continuious_cols:
        X.loc[0, label] = input_dict.get(label)
    
    # Scales data and predicts
    X = scaler.transform(X)
    pred = model.predict(X.values)
    
    return pred

In [12]:
## App layout
app.layout = html.Div([
    html.H1('Predict Claim'),
    html.H2('Enter the following data:'),
    
    html.Div(
    children=[div_numerical, div_categorical]
    ),
    
    html.H1(id='output',
    style={'margin-top': '50px', 'text-align': 'center'})
])

In [13]:
predictors = [*discreet_cols, *continuious_cols]

@app.callback(
 Output('output', 'children'),
 [Input(x, 'value') for x in predictors])

def show_prediction(**kwargs):
    pred = get_prediction(**kwargs)
    return str(f'Prediction: {pred}')

In [14]:
app.run_server()

Exception in thread Thread-5:
Traceback (most recent call last):
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\threading.py", line 973, in _bootstrap_inner
    self.run()
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\threading.py", line 910, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\site-packages\retrying.py", line 49, in wrapped_f
    return Retrying(*dargs, **dkw).call(f, *args, **kw)
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\site-packages\retrying.py", line 212, in call
    raise attempt.get()
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\site-packages\retrying.py", line 247, in get
    six.reraise(self.value[0], self.value[1], self.value[2])
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python39\lib\site-packages\six.py", line 719, in reraise
    raise value
  File "C:\Users\mrcha\AppData\Local\Programs\Python\Python

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=8050): Max retries exceeded with url: /_alive_7a07201a-8032-488a-a145-ddcdc041dc0a (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001AE29D1B3A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))