# RashomonSetAnalyser class ver. 1.0

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import dalex as dx
import copy

In [2]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
X, y = datasets.load_breast_cancer(return_X_y=True, as_frame=True)

params = {'n_estimators': list(np.arange(10, 50, 10, dtype = 'int'))}
rf = RandomForestClassifier()

In [3]:
from RashomonSetAnalyser import RashomonSetAnalyser

### How this class works?

In [4]:
rashomon = RashomonSetAnalyser()

In [5]:
rashomon.base_model is None

True

In [6]:
rashomon.generate_rashomon_set(X, y, rf, searcher_type = 'grid', rashomon_ratio = 1, param_grid = params)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.034286,0.000377,0.004175,0.000181,20,{'n_estimators': 20},0.938596,0.964912,0.991228,0.973684,0.973451,0.968374,0.017175,1
1,0.018947,0.000756,0.003354,0.000258,10,{'n_estimators': 10},0.929825,0.964912,0.982456,0.973684,0.973451,0.964866,0.018378,2
2,0.065511,0.001687,0.006037,0.0003,40,{'n_estimators': 40},0.929825,0.947368,0.991228,0.982456,0.955752,0.961326,0.022622,3
3,0.049235,0.001204,0.005032,6.3e-05,30,{'n_estimators': 30},0.912281,0.938596,0.982456,0.95614,0.973451,0.952585,0.025141,4


In [7]:
rashomon.base_model

['Base model', RandomForestClassifier(n_estimators=20)]

In [8]:
rashomon.models

[['Model 1', RandomForestClassifier(n_estimators=10)],
 ['Model 2', RandomForestClassifier(n_estimators=40)],
 ['Model 3', RandomForestClassifier(n_estimators=30)]]

In [9]:
rashomon.change_rashomon_ratio(0.6)

In [10]:
rashomon.models

[['Model 1', RandomForestClassifier(n_estimators=10)],
 ['Model 2', RandomForestClassifier(n_estimators=40)]]

### Fit models

In [11]:
rashomon.fit(X, y)

### Compare PDP

In [12]:
rashomon.pdp_comparator(X, y)

Unnamed: 0,colname,Model 1,Model 2
0,mean radius,5.2195,3.393417
1,mean texture,1.203833,0.645417
2,mean perimeter,1.709833,1.009667
3,mean area,3.2065,1.554667
4,mean smoothness,2.090333,0.69575
5,mean compactness,0.274667,1.802
6,mean concavity,1.541833,1.974417
7,mean concave points,4.127833,0.8285
8,mean symmetry,0.206833,0.1585
9,mean fractal dimension,1.543667,0.07475


In [13]:
rashomon.pdp_comparator(X, y, metric = 'integrate')

Unnamed: 0,colname,Model 1,Model 2
0,mean radius,-97.0571,-70.492257
1,mean texture,29.969772,-11.772549
2,mean perimeter,-210.114552,7.123237
3,mean area,-4686.481596,1991.587044
4,mean smoothness,-0.032994,0.007596
5,mean compactness,0.040858,0.441499
6,mean concavity,-0.409636,0.414223
7,mean concave points,-0.401068,0.082
8,mean symmetry,0.033852,0.002858
9,mean fractal dimension,-0.111425,0.001364


### Comapre PDP with parameter *save_model_profiles=True* and change metrics

In [14]:
rashomon.pdp_comparator(X, y, save_model_profiles = True)

Unnamed: 0,colname,Model 1,Model 2
0,mean radius,5.2195,3.393417
1,mean texture,1.203833,0.645417
2,mean perimeter,1.709833,1.009667
3,mean area,3.2065,1.554667
4,mean smoothness,2.090333,0.69575
5,mean compactness,0.274667,1.802
6,mean concavity,1.541833,1.974417
7,mean concave points,4.127833,0.8285
8,mean symmetry,0.206833,0.1585
9,mean fractal dimension,1.543667,0.07475


In [15]:
rashomon.pdp_comparator_change_metric(metric = 'integrate')

Unnamed: 0,colname,Model 1,Model 2
0,mean radius,-97.0571,-70.492257
1,mean texture,29.969772,-11.772549
2,mean perimeter,-210.114552,7.123237
3,mean area,-4686.481596,1991.587044
4,mean smoothness,-0.032994,0.007596
5,mean compactness,0.040858,0.441499
6,mean concavity,-0.409636,0.414223
7,mean concave points,-0.401068,0.082
8,mean symmetry,0.033852,0.002858
9,mean fractal dimension,-0.111425,0.001364


In [16]:
rashomon.pdp_comparator_change_metric(metric = 'sum')

Unnamed: 0,colname,Model 1,Model 2
0,mean radius,-5.2195,-3.32275
1,mean texture,0.9665,-0.302417
2,mean perimeter,-1.5705,0.368833
3,mean area,-3.056833,1.554667
4,mean smoothness,-0.988333,-0.17225
5,mean compactness,0.162667,1.741833
6,mean concavity,-1.441833,1.183917
7,mean concave points,-4.127833,0.6995
8,mean symmetry,0.1995,0.055667
9,mean fractal dimension,-1.249,0.023583


### Choose a subset of variables

In [17]:
rashomon.pdp_comparator(X, y, save_model_profiles = True, variables = ['mean area', 'worst area'])

Unnamed: 0,colname,Model 1,Model 2
0,mean area,3.2065,1.554667
1,worst area,3.1535,2.237417


In [18]:
rashomon.pdp_comparator_change_metric(metric = 'integrate')

Unnamed: 0,colname,Model 1,Model 2
0,mean area,-4686.481596,1991.587044
1,worst area,-7120.251331,-5317.726308


In [19]:
rashomon.pdp_comparator_change_metric(metric = 'sum')

Unnamed: 0,colname,Model 1,Model 2
0,mean area,-3.056833,1.554667
1,worst area,-2.448167,-2.185083
