## Import Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer

import shapash
from shapash.explainer.smart_explainer import SmartExplainer
from shapash.utils.load_smartpredictor import load_smartpredictor

## Read the data

In [2]:
df = pd.read_csv('Concrete_Data.csv')

## Imputing Missing Data

In [3]:
num= df.select_dtypes(include=['int64','float64']).keys()

impute=SimpleImputer(strategy='mean')
impute_fit= impute.fit(df[num])
df[num]= impute_fit.transform(df[num])

## Divide independent and dependent variables

In [4]:
x = df.drop(['csMPa'],axis=1)
y = df['csMPa']

In [5]:
num

Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'csMPa'],
      dtype='object')

In [6]:
df[num]

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),csMPa
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28.0,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28.0,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270.0,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365.0,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360.0,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28.0,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28.0,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28.0,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28.0,32.77


## Splitting data into train and test

In [7]:
xtrain,xtest,ytrain,ytest= train_test_split(x,y,test_size=0.3,random_state=42)

In [8]:
xtest

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day)
31,266.0,114.0,0.0,228.0,0.0,932.0,670.0,365.0
109,362.6,189.0,0.0,164.9,11.6,944.7,755.8,7.0
136,389.9,189.0,0.0,145.9,22.0,944.7,755.8,28.0
88,362.6,189.0,0.0,164.9,11.6,944.7,755.8,3.0
918,145.0,0.0,179.0,202.0,8.0,824.0,869.0,28.0
...,...,...,...,...,...,...,...,...
250,250.0,0.0,95.7,187.4,5.5,956.9,861.2,14.0
377,376.0,0.0,0.0,214.6,0.0,1003.5,762.4,56.0
615,277.0,0.0,0.0,191.0,0.0,968.0,856.0,180.0
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28.0


## Feature scaling

In [9]:
stand= StandardScaler()

fit= stand.fit(xtrain)

xtrain_scl=fit.transform(xtrain)
xtest_scl=fit.transform(xtest)

## Apply Random Forest Regression Model

In [10]:
regressor= RandomForestRegressor(ccp_alpha=0.0)

fit_regressor= regressor.fit(xtrain_scl,ytrain)

## Use Shapash Library

## Step 1: Declare SmartExplainer Object

In [11]:
SE = SmartExplainer()

## Step 2: : Compile Model, Dataset, Encoders

In [12]:
SE.compile(
x=xtest,
model=regressor,
)

Backend: Shap TreeExplainer


## Step 3: Display output

In [13]:
app = SE.run_app(title_story='Concrete_Data')





Dash is running on http://0.0.0.0:8050/



INFO:root:Your Shapash application run on http://DESKTOP-461QHE8:8050/
INFO:root:Use the method .kill() to down your app.
INFO:shapash.webapp.smart_app:Dash is running on http://0.0.0.0:8050/



 * Serving Flask app 'shapash.webapp.smart_app' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


## Step 4: Generate the Shapash Report

In [None]:

SE.generate_report(
    output_file='C://Blog Python Libraries//doumentation//shapash//report//report.html',
    project_info_file='C://Blog Python Libraries//doumentation//shapash//report//project_info.yml',
    x_train=xtrain,
    y_train=ytrain,
    y_test=ytest,
    title_story="Concrete_Data report",
    title_description="""This document is a data science report of the Concrete_Data project.
        It was generated using the Shapash library.""",
    metrics=[{'name': 'MSE', 'path': 'sklearn.metrics.mean_squared_error'}]
)


## Step 5: From training to deployment : Prediction

In [14]:
predictor = SE.to_smartpredictor()

## convert prediction into a pickle file

In [15]:
predictor.save('./predictor.pkl')

INFO:werkzeug: * Running on http://192.168.1.3:8050/ (Press CTRL+C to quit)


## load this pick file

In [16]:
predictor_load = load_smartpredictor('./predictor.pkl')

In [17]:
predictor_load.add_input(x=x, ypred=y)
detailed_contributions = predictor_load.detail_contributions()
detailed_contributions.head()

Unnamed: 0,csMPa,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day)
0,79.99,13.054279,-0.148482,-0.571653,-4.754666,2.637141,-1.487657,-2.309115,8.427009
1,61.89,13.054279,-0.148482,-0.571653,-4.754666,2.637141,-1.487657,-2.309115,8.427009
2,40.27,15.557115,5.425399,-0.316798,-4.933093,-0.909403,-1.027041,-2.462606,8.042683
3,41.05,15.557115,5.425399,-0.316798,-4.933093,-0.909403,-1.027041,-2.462606,8.042683
4,44.3,15.557115,5.425399,-0.316798,-4.933093,-0.909403,-1.027041,-2.462606,8.042683
