In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numba import jit
import pickle
import shap

"""
We'll load in the model we want to plot here.
"""
def load_model(model):
    with open('Model Results/RF/%s.pkl'%model,'rb') as pkl_file:
        base = pickle.load(pkl_file)
    return base

"""
We'll load in the data we want to compare the plot to here. 
This data should have been saved while training and generating 
a random forest model.
"""
def load_data(data_file):
    with open('Model Results/RF/%s.pkl'%data_file ,'rb') as pkl_file:
        data = pickle.load(pkl_file)
    X,Y = data
    X_train, X_test, Y_train, Y_test = X,Y
    return X_train, X_test, Y_train, Y_test

"""
This will plot a force plot. A force plot is an interactive two dimensional plot that let's 
us see why each prediction was made the way it was.

This plot can be just as easily plotted for a single prediction instead of for the entire dataset.
You can simply call shap_vales[n,:], X[n,:], for the nth prediction you want to see.
The larger the value the larger the influence it has on the prediction. Red values increase the prediction 
value while blue values decrease the prediction value.
"""
def get_force_plot(model,X):
    shap.initjs()
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)
    display(shap.force_plot(explainer.expected_value, shap_values, X))
    

base = load_model('rf')
X_train,X_test,Y_train,Y_test = load_data('data.pkl')
get_force_plot(base,X_test)

