In [1]:
from IPython.core.display import HTML
HTML('<style> .container{ width:90%; } </style>')

# Load Trained Model From Pickle

In [2]:
import numpy as np
import pandas as pd

In [3]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [4]:
import joblib
create_model=None
trained_model = joblib.load('models/LightGBM.pkl')
columns = joblib.load('models/columns.pkl')

# Predict Category For Sample Data

In [5]:
data = pd.get_dummies(pd.DataFrame({'age':43,
                                    'workclass':'Private',
                                    'fnlwgt':100000,
                                    'education':'Bachelors',
                                    'education-num':13,
                                    'marital-status':'Married-civ-spouse',
                                    'occupation':'Sales',
                                    'relationship':'Husband',
                                    'race':'White',
                                    'sex':'Male',
                                    'capital-gain':0,
                                    'capital-loss':0,
                                    'hours-per-week':40,
                                    'native-country':'Spain'}, index=[0]))

In [6]:
data = data.reindex(columns=columns, fill_value=0)

In [7]:
trained_model.predict(data)

array(['>50K'], dtype=object)

# Explain Weights and Prediction Using ELI5

Split the pipeline between estimator (last step) and transformer (all steps bar last). Then get the transformed data and columns

In [8]:
from sklearn.pipeline import Pipeline

estimator = trained_model
transformed_data = data
feature_names = columns
if type(trained_model) == Pipeline:
    if len(trained_model.steps) == 1:
        estimator = trained_model.steps[0][1]
    else:
        estimator = trained_model.steps[-1][1]
        transformer = Pipeline(trained_model.steps[:-1])
        transformed_data = transformer.transform(data)
        if 'select' in transformer.named_steps:            
            feature_names = data.columns[transformer.named_steps.select.get_support()].tolist()

In [9]:
import eli5
eli5.show_weights(estimator, feature_names=feature_names, top=None, show=eli5.formatters.fields.ALL)

Weight,Feature
0.3781,marital-status_Married-civ-spouse
0.1982,capital-gain
0.1661,education-num
0.0779,age
0.0686,capital-loss
0.0399,hours-per-week
0.0113,occupation_Exec-managerial
0.0077,occupation_Other-service
0.0061,occupation_Prof-specialty
0.006,sex_Male


In [10]:
eli5.show_prediction(estimator, transformed_data[0], feature_names=feature_names, 
                     top=None, show=eli5.formatters.fields.ALL)

Contribution?,Feature
1.442,marital-status_Married-civ-spouse
1.074,education-num
0.402,age
0.126,sex_Male
0.094,relationship_Own-child
0.078,occupation_Other-service
0.045,occupation_Handlers-cleaners
0.042,occupation_Machine-op-inspct
0.031,occupation_Farming-fishing
0.026,relationship_Unmarried
