In [4]:
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
from sklearn.compose import ColumnTransformer
import matplotlib.pyplot as pl

import shap
from shap import Explanation

from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import (
    FunctionTransformer,
    LabelBinarizer,
    LabelEncoder,
    OneHotEncoder,
    OrdinalEncoder,
    StandardScaler,
)
from sklearn import preprocessing
from lightgbm import LGBMClassifier
import json
import joblib

In [None]:
st.set_option('deprecation.showPyplotGlobalUse', False)

In [None]:
def to_object(x):
    return pd.DataFrame(x).astype(str)

def to_number(x):
    return pd.DataFrame(x).astype(float)

In [16]:
with open('feature_data_012.json') as json_file:
    columns = json.load(json_file)


In [18]:
row={}
pred_cols=[]
for col,values in columns.items():
    pred_cols.append(col)
    ncol=" ".join(col.split("_"))
    options=[str(cols).replace("nan","Unknown") for cols in values[1]]

    if values[0] in["cat","ord"]:
     #   print("cat")
    
        row[col]=[st.sidebar.selectbox(ncol, options,key=col)]
        
    if values[0] in["int"]:
       # print(col)
        #print(values[1][0])
        row[col]=[st.sidebar.number_input(ncol,min_value=values[1][0],max_value=values[1][1],value=values[1][2],step=0.5,key=col)]



NameError: name 'pred_cols' is not defined

In [None]:
def transfrom_array_to_df_onehot(pl,nparray,onehot=True):
    col_list_int = pl["preprocessor"].transformers_[0][2] #changes col location
    #print(col_list_int)
    ordinal_col=pl["preprocessor"].transformers[1][2]
    original_col=pl["preprocessor"].transformers[2][2]
    col_list=col_list_int.copy()
    col_list.extend(ordinal_col)
    if onehot:
        encoded_col=pl["preprocessor"].transformers_[2][1].named_steps["OneHotEnconding"].get_feature_names_out()
    
        #print(len(encoded_col))
        new_enconded_list=[]
        for idx,col in enumerate(original_col):
            for n_col in encoded_col:
            #print(idx,col)
           # print("x"+str(idx))
                if "x"+str(idx)+"_" in n_col:
                 #   print(col,n_col)
                    new_enconded_list.append(col+"_"+n_col.split("_")[-1])
        
        col_list.extend(new_enconded_list)
    #    print(col_list)
        #print(len(col_list))
    else:
        col_list.extend(original_col)

    df1 = pd.DataFrame(nparray, columns=col_list)
    return df1

In [None]:
st.markdown("""Please select the options on the sidebar for the model to predict the delivery type. Click the button in the end of the sidebar to start prediction""")

In [None]:
filename = 'finalized_model_lgbm_.sav'
loaded_model = joblib.load(filename)
filename = 'pipeline_012.sav'
pipeline = joblib.load(filename)
filename = 'label_encoder_012.sav'
label_encoder = joblib.load(filename)
filename = 'explainer_012.sav'
explainer = joblib.load(filename)

In [None]:
def create_outcome(le,arr):
    outcome_dict={}
    for idx,class_ in enumerate(le.classes_):
        outcome_dict[class_]=[str(round(arr[0][idx]*100,2)) +" %"]
    return pd.DataFrame.from_dict(outcome_dict)

In [None]:
def adddummy_variable(X,pl):
    col_list_int = pl["preprocessor"].transformers_[0][2] #changes col location
    ordinal_col=pl["preprocessor"].transformers[1][2]
    original_col=pl["preprocessor"].transformers[2][2]
    for c in col_list_int:
        if c not in X.columns:
            X[c]=0
    for idx,c in enumerate(original_col):

        if c not in X.columns:
            X[c]=pipeline["preprocessor"].transformers_[2][1][2].categories_[idx][0]
    for idx,c in enumerate(ordinal_col):
        print(c)
        if c not in X.columns:
            X[c]="0"
    return X

In [None]:
make_prediction=st.sidebar.button('Make Prediction')
explaining=st.sidebar.button('Make Prediction with Shap Values')


In [None]:
def streamlit_predict(row):
    df=pd.DataFrame.from_dict(row)
    st.write('Predicting for')
   # st.write(row)
    st.dataframe(df)
    X_=adddummy_variable(df.copy(),pipeline)
    X_=X_.replace({'Unknown': 'nan'})

   # st.dataframe(X_)
    X=pipeline.transform(X_)
   # st.write("ipeline")
    df1=transfrom_array_to_df_onehot(pipeline,X,onehot=False)
   # st.dataframe(df1)
    X_new=df1.loc[:,pred_cols]
    pred=loaded_model.predict(X_new.values)
    pred_proba=loaded_model.predict_proba(X_new.values)
    st.markdown("### The prediction is:  ")
    st.write(label_encoder.inverse_transform(pred)[0])
    st.dataframe(create_outcome(label_encoder,pred_proba))
    return df,X_new,pred,pred_proba

In [None]:
if make_prediction:
    streamlit_predict(row)

if explaining:
    df,X_new,pred,pred_proba=streamlit_predict(row)
    print(df.shape,X_new.shape)
    st.write('Explaining using SHAP values...')
    shap_values = explainer.shap_values(X_new.values,check_additivity=False)
    #Now we can plot relevant plots that will help us analyze the model.
    st.subheader("Summary Plot")
    shap.summary_plot(shap_values, X_new.values, plot_type="bar", class_names= label_encoder.classes_, feature_names = X_new.columns)
    st.pyplot(bbox_inches='tight',dpi=300,pad_inches=0)
    pl.clf()
    st.subheader("Force Plot")
    shap.force_plot(explainer.expected_value[pred[0]], shap_values[pred[0]],df,matplotlib=True,show=False,figsize=(40,10))
    st.pyplot(bbox_inches='tight',dpi=300,pad_inches=0)
    pl.clf()



In [None]:
#https://github.com/sgoede/streamlit-boston-app/blob/master/boston_xgb_app.py