# ・分類モデル

In [None]:
import pandas as pd
from shapash.data.data_loader import data_loading
from category_encoders import OrdinalEncoder

In [None]:
titanic_df, titanic_dict = data_loading('titanic') #titanicをロードします

In [None]:
y_df=titanic_df['Survived'].to_frame() #Survivedデータ
X_df=titanic_df[titanic_df.columns.difference(['Survived'])] #Survived以外のデータ 

In [None]:
categorical_features = [col for col in X_df.columns if X_df[col].dtype == 'object'] #object型のcolumnsをリスト形式で抜き出す

カテゴリカル機能のエンコード 

In [None]:
from category_encoders import OrdinalEncoder
encoder = OrdinalEncoder(
    cols=categorical_features,
    handle_unknown='ignore',
    return_df=True).fit(X_df)
X_df=encoder.transform(X_df) #categorical_featuresをエンコード(数値化)


is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead



トレーニング/テスト分割 

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X_df, y_df, train_size=0.75, random_state=7) #交差検証の為にデータを分割

モデルフィッティング 

In [None]:
from sklearn.ensemble.forest import RandomForestClassifier #分類モデル(ランダムフォレスト)
clf = RandomForestClassifier(n_estimators=200).fit(Xtrain,ytrain) #モデルの作成


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



In [None]:
y_pred = pd.DataFrame(clf.predict(Xtest),columns=['pred'],index=Xtest.index).astype(int) #予測結果の作成

SmartExplainerを宣言してコンパイルする 

In [None]:
from shapash.explainer.smart_explainer import SmartExplainer #

In [None]:
response_dict = {0: 'Death', 1:'Survival'} #ラベルの指定

In [None]:
xpl = SmartExplainer(
    features_dict=titanic_dict, # 特徴量の説明を指定
    label_dict=response_dict    # 分かりやすいように結果ラベルを指定
)

説明変数、分類モデル、前処理に利用したエンコーダ、予測結果を指定してコンパイルします。

In [None]:
xpl.compile(
    x=Xtest,
    model=clf,
    preprocessing=encoder, 
    y_pred=y_pred
)

Backend: Shap TreeExplainer


WebAppを起動します 

In [None]:
app = xpl.run_app()



INFO:root:Your Shapash application run on http://DESKTOP-FQP7LG7:8050/


Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/



INFO:root:Use the method .kill() to down your app.


Dash is running on http://0.0.0.0:8050/



INFO:shapash.webapp.smart_app:Dash is running on http://0.0.0.0:8050/



 * Serving Flask app 'shapash.webapp.smart_app' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://192.168.2.105:8050/ (Press CTRL+C to quit)


リンクを読む込む事でShapash Monitorを参照する事が可能です。

使用後にWebAppを停止します

In [None]:
app.kill()

SmartExplainerをピクルスファイルに保存 

In [None]:
xpl.save('./xpl.pkl')

![title](img/picture.png)

feature_plot：
性別(sex)、チケットクラス(ticket_class)・・生存率に対する寄与率が高い順に並べられています。

sex_plot：
女性の方が生存率が高いとみてとれます。

Local_plot：
id=254(30歳、男性)を対象としています。
Age, Passenger fareが生存率を上げ、sexが生存率を下げている原因と考えられます。

# ・回帰モデル

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

住宅価格データを読み込む 

In [None]:
from shapash.data.data_loader import data_loading
house_df, house_dict = data_loading('house_prices')
y_df = house_df['SalePrice'].to_frame()
X_df = house_df[house_df.columns.difference(['SalePrice'])]

In [None]:
X_df.head()

Unnamed: 0_level_0,1stFlrSF,2ndFlrSF,3SsnPorch,BedroomAbvGr,BldgType,BsmtCond,BsmtExposure,BsmtFinSF1,BsmtFinSF2,BsmtFinType1,...,SaleType,ScreenPorch,Street,TotRmsAbvGrd,TotalBsmtSF,Utilities,WoodDeckSF,YearBuilt,YearRemodAdd,YrSold
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,856,854,0,3,Single-family Detached,Typical - slight dampness allowed,No Exposure/No Basement,706,0,Good Living Quarters,...,Warranty Deed - Conventional,0,Paved,8,856,"All public Utilities (E,G,W,& S)",0,2003,2003,2008
2,1262,0,0,3,Single-family Detached,Typical - slight dampness allowed,Good Exposure,978,0,Average Living Quarters,...,Warranty Deed - Conventional,0,Paved,6,1262,"All public Utilities (E,G,W,& S)",298,1976,1976,2007
3,920,866,0,3,Single-family Detached,Typical - slight dampness allowed,Mimimum Exposure,486,0,Good Living Quarters,...,Warranty Deed - Conventional,0,Paved,6,920,"All public Utilities (E,G,W,& S)",0,2001,2002,2008
4,961,756,0,3,Single-family Detached,Good,No Exposure/No Basement,216,0,Average Living Quarters,...,Warranty Deed - Conventional,0,Paved,7,756,"All public Utilities (E,G,W,& S)",0,1915,1970,2006
5,1145,1053,0,4,Single-family Detached,Typical - slight dampness allowed,Average Exposure,655,0,Good Living Quarters,...,Warranty Deed - Conventional,0,Paved,9,1145,"All public Utilities (E,G,W,& S)",192,2000,2000,2008


In [None]:
X_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Data columns (total 72 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   1stFlrSF       1460 non-null   int64  
 1   2ndFlrSF       1460 non-null   int64  
 2   3SsnPorch      1460 non-null   int64  
 3   BedroomAbvGr   1460 non-null   int64  
 4   BldgType       1460 non-null   object 
 5   BsmtCond       1460 non-null   object 
 6   BsmtExposure   1460 non-null   object 
 7   BsmtFinSF1     1460 non-null   int64  
 8   BsmtFinSF2     1460 non-null   int64  
 9   BsmtFinType1   1460 non-null   object 
 10  BsmtFinType2   1460 non-null   object 
 11  BsmtFullBath   1460 non-null   int64  
 12  BsmtHalfBath   1460 non-null   int64  
 13  BsmtQual       1460 non-null   object 
 14  BsmtUnfSF      1460 non-null   int64  
 15  CentralAir     1460 non-null   object 
 16  Condition1     1460 non-null   object 
 17  Condition2     1460 non-null   object 
 18  Electric

In [None]:
from category_encoders import OrdinalEncoder

categorical_features = [col for col in X_df.columns if X_df[col].dtype == 'object']

encoder = OrdinalEncoder(
    cols=categorical_features,
    handle_unknown='ignore',
    return_df=True).fit(X_df)

X_df = encoder.transform(X_df)


is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead



トレーニング/テスト分割 

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X_df, y_df, train_size=0.75, random_state=1)

モデルフィッティング 

In [None]:
regressor = LinearRegression().fit(Xtrain,ytrain) #線形回帰モデル

In [None]:
y_pred = pd.DataFrame(regressor.predict(Xtest), columns=['pred'], index=Xtest.index)

In [None]:
from shapash.explainer.smart_explainer import SmartExplainer

In [None]:
xpl = SmartExplainer(features_dict=house_dict)

In [None]:
xpl.compile(
    x=Xtest,
    model=regressor,
    preprocessing=encoder, # Optional: compile step can use inverse_transform method
    y_pred=y_pred # Optional
)

Backend: Shap LinearExplainer


WebAppを起動します

In [None]:
app = xpl.run_app()





Dash is running on http://0.0.0.0:8050/



INFO:root:Your Shapash application run on http://DESKTOP-FQP7LG7:8050/
INFO:root:Use the method .kill() to down your app.


Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/



INFO:shapash.webapp.smart_app:Dash is running on http://0.0.0.0:8050/



 * Serving Flask app 'shapash.webapp.smart_app' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://192.168.2.105:8050/ (Press CTRL+C to quit)


使用後にWebAppを停止します

In [None]:
app.kill()