In [1]:
# first we shall create a sample model 

from Models.fifa_model_generation import get_fifa_model

# the function returns a trained fifa model
model_obj, x_dataset, y_dataset  = get_fifa_model()
model_obj



GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='auto',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [6]:
"""Interpretation Class"""

import pandas as pd

class DevExplainer:
    """
    Interpretation class. Before calling interpretation subclasses like partial
    dependence, one must call Interpretation.load_data().
    Examples
    --------
        >>> from skater.core.explanations import Interpretation
        >>> interpreter = Interpretation()
        >>> interpreter.load_data(X, feature_ids = ['a','b'])
        >>> interpreter.partial_dependence([feature_id1, feature_id2], regressor.predict)
    """

    def __init__(self, model_obj, x_train, y_train, model_type="classification", log_level=10, features=None, target=None, prediction_fn=None):
        self.model = self.verify_model(model_obj)
        self.x_train = self.verify_data(x_train)
        self.y_train = self.verify_data(y_train)
        self.model_type = self.verify_model_type(model_type)
        self.feature_names = self.verify_features(features)
        self.target_names = self.verify_target(target)
        self.prediction_fn = prediction_fn
        self.log_level = log_level
        self.interpreter = None
        self.inmemory_model = None
    
    def verify_model(self, model):
        print("\nModel Object Type : ", type(model))
        if type(model) != "str":
            print("Model Object Validated")
        else:
            raise TypeError('Please provide Proper Model Object')
        return model
            
    def verify_data(self, dataset):
        print("\nValidating dataset")
        print("Dataset Type : ", type(dataset))
        if isinstance(dataset, pd.DataFrame) or isinstance(dataset, pd.Series):
            return dataset
        else:
            raise TypeError('Please provide dataset of type: \nPandas Dataframe\nPandas series')
    
    def verify_model_type(self, model_type):
        print("\nValidating model type")
        print("Type entered :", model_type.lower())
        if model_type.lower() == "classification" or model_type.lower() == "regression":
            print("Model Type Validated")
            return model_type.lower()
        else:
            raise TypeError('Please provide model_type: \nClassification \nRegression')
    
    def verify_features(self, features):
        if features is None:
            print("\nValidating feature names")
            print("List of features detected :", list(self.x_train))
            return list(self.x_train)

    def verify_target(self, target):
        if target is None:
            print("\nValidating target names")
            if self.model_type == "classification":
                print("List of Target Values :", list(y_dataset.unique()))
                return list(y_dataset.unique())
            elif self.model_type == "regression":
                print("Target Column :", y_dataset.name)
                return [y_dataset.name]
    
    def create_interpretation(self):
        from skater.core.explanations import Interpretation
        if self.interpreter is None:            
            print("Creating Interpretation")
            self.interpreter = Interpretation(
                training_data=self.x_train, 
                training_labels=self.y_train, 
                feature_names=self.feature_names,
                class_names=self.target_names,
                index=None,
                log_level=self.log_level
            )
            return self.interpreter
        else:
            return self.interpreter
        
        
    def create_inmemory_model(self):
        from skater.model import InMemoryModel
        if self.inmemory_model is None:            
            print("Creating In Memory Model")
            self.inmemory_model = InMemoryModel(
                prediction_fn = self.model_prediction_fn(),
                input_formatter=None, 
                output_formatter=None, 
                target_names=self.target_names,
                feature_names=self.feature_names,
                unique_values=self.find_unique(),
                examples=self.x_train[0:10], 
                model_type="classifier" if self.model_type=="classification" else "regressor", 
                probability=None, 
                log_level=30
            )
            return self.inmemory_model
        else:
            return self.inmemory_model

        
    def get_partial_dependence_plot(self):
        self.interpreter = self.create_interpretation()
        self.inmemory_model = self.create_inmemory_model()
        pdp_list = []
        for feature in self.feature_names:
            pdp = self.interpreter.partial_dependence.partial_dependence(
            [feature], self.inmemory_model, filter_classes=self.target_names, n_jobs=-1
            )
            pdp_list.append(pdp)
        return pdp_list


    def model_prediction_fn(self):
        if self.prediction_fn is None:
            if callable(getattr(self.model, "predict_proba", None)):
                print("Checking if predict proba exists :",callable(getattr(self.model, "predict_proba", None)))
                return self.model.predict_proba            
            elif callable(getattr(self.model, "predict", None)):
                print("Checking if predict exists :",callable(getattr(self.model, "predict", None)))
                return self.model.predict
            else:
                raise TypeError("Please pass appropriate prediction function")
        else:
            return self.prediction_fn
    
    def find_unique(self):
        if self.model_type == "classification":
            return list(self.y_train.unique())
        else:
            return None
    
            
    def wait(self, wait_time):
        import time
        time.sleep(wait_time)

In [7]:
explainer_obj = DevExplainer(
    model_obj = model_obj,
    x_train = x_dataset,
    y_train = y_dataset,
    model_type = "Regression",
    log_level = 10,
)
explainer_obj.get_partial_dependence_plot()

2021-07-20 07:45:13,840 - skater.core.explanations - INFO - Loading Data
2021-07-20 07:45:13,841 - skater.data.datamanager - DEBUG - __init__ data.shape: (3750, 10)
2021-07-20 07:45:13,842 - skater.data.datamanager - DEBUG - after transform X.shape: (3750, 10)
2021-07-20 07:45:13,842 - skater.core.explanations - INFO - Data loaded
2021-07-20 07:45:13,844 - skater.core.explanations - INFO - Data shape: (3750, 10)
2021-07-20 07:45:13,845 - skater.core.explanations - INFO - Dataset Feature_ids: ['age', 'height_cm', 'weight_kg', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve']
2021-07-20 07:45:13,909 - skater.data.datamanager - INFO - Generated grid of shape [(1, 25)]
2021-07-20 07:45:13,910 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 25)
2021-07-20 07:45:13,910 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:13,911 - skater.dat


Model Object Type :  <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Model Object Validated

Validating dataset
Dataset Type :  <class 'pandas.core.frame.DataFrame'>

Validating dataset
Dataset Type :  <class 'pandas.core.series.Series'>

Validating model type
Type entered : regression
Model Type Validated

Validating feature names
List of features detected : ['age', 'height_cm', 'weight_kg', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve']

Validating target names
Target Column : value_eur
Creating Interpretation
Creating In Memory Model
Checking if predict exists : True


2021-07-20 07:45:14,220 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:14,221 - skater.core.explanations - DEBUG - Feature Ids: ['age']
2021-07-20 07:45:14,221 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate', 'target_names': ['value_eur'], 'filtered_target_names': ['value_eur'], 'feature_columns_for_pd': ['age'], 'feature_ids_for_pd': ['age'], 'all_feature_ids': ['age', 'height_cm', 'weight_kg', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve']}
faster runs, do progressbar=False


[25/25] grid cells ████████████████████ Time elapsed: 0 seconds

2021-07-20 07:45:15,442 - skater.data.datamanager - INFO - Generated grid of shape [(1, 22)]
2021-07-20 07:45:15,443 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 22)
2021-07-20 07:45:15,443 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:15,444 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:15,745 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:15,746 - skater.core.explanations - DEBUG - Feature Ids: ['height_cm']
2021-07-20 07:45:15,746 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate', 'target

[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

2021-07-20 07:45:16,746 - skater.data.datamanager - INFO - Generated grid of shape [(1, 22)]
2021-07-20 07:45:16,747 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 22)
2021-07-20 07:45:16,748 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:16,748 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:17,051 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:17,052 - skater.core.explanations - DEBUG - Feature Ids: ['weight_kg']
2021-07-20 07:45:17,052 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate', 'target

[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

2021-07-20 07:45:18,162 - skater.data.datamanager - INFO - Generated grid of shape [(1, 29)]
2021-07-20 07:45:18,163 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 29)
2021-07-20 07:45:18,163 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:18,164 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:18,537 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:18,538 - skater.core.explanations - DEBUG - Feature Ids: ['attacking_crossing']
2021-07-20 07:45:18,538 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate'

[29/29] grid cells ████████████████████ Time elapsed: 1 seconds

2021-07-20 07:45:19,843 - skater.data.datamanager - INFO - Generated grid of shape [(1, 30)]
2021-07-20 07:45:19,843 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 30)
2021-07-20 07:45:19,844 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:19,845 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:20,131 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:20,132 - skater.core.explanations - DEBUG - Feature Ids: ['attacking_finishing']
2021-07-20 07:45:20,133 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate

[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

2021-07-20 07:45:21,443 - skater.data.datamanager - INFO - Generated grid of shape [(1, 29)]
2021-07-20 07:45:21,444 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 29)
2021-07-20 07:45:21,444 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:21,445 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:21,723 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:21,724 - skater.core.explanations - DEBUG - Feature Ids: ['attacking_heading_accuracy']
2021-07-20 07:45:21,724 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_e

[29/29] grid cells ████████████████████ Time elapsed: 1 seconds

2021-07-20 07:45:23,042 - skater.data.datamanager - INFO - Generated grid of shape [(1, 22)]
2021-07-20 07:45:23,043 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 22)
2021-07-20 07:45:23,044 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:23,045 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:23,328 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:23,329 - skater.core.explanations - DEBUG - Feature Ids: ['attacking_short_passing']
2021-07-20 07:45:23,329 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_esti

[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

2021-07-20 07:45:24,342 - skater.data.datamanager - INFO - Generated grid of shape [(1, 30)]
2021-07-20 07:45:24,343 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 30)
2021-07-20 07:45:24,344 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:24,345 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:24,627 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:24,628 - skater.core.explanations - DEBUG - Feature Ids: ['attacking_volleys']
2021-07-20 07:45:24,629 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate',

[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

2021-07-20 07:45:25,866 - skater.data.datamanager - INFO - Generated grid of shape [(1, 27)]
2021-07-20 07:45:25,867 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 27)
2021-07-20 07:45:25,868 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:25,868 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:26,208 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:26,209 - skater.core.explanations - DEBUG - Feature Ids: ['skill_dribbling']
2021-07-20 07:45:26,209 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate', '

[27/27] grid cells ████████████████████ Time elapsed: 1 seconds

2021-07-20 07:45:27,445 - skater.data.datamanager - INFO - Generated grid of shape [(1, 30)]
2021-07-20 07:45:27,446 - skater.core.explanations - DEBUG - Grid shape used for pdp: (1, 30)
2021-07-20 07:45:27,447 - skater.core.explanations - DEBUG - Grid resolution for pdp: 30
2021-07-20 07:45:27,447 - skater.data.datamanager - DEBUG - Generating sample with args:
 {'sample': True, 'strategy': 'random-choice', 'n_samples': 1000, 'replace': True, 'samples_per_bin': array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), 'bin_count': 50}
2021-07-20 07:45:27,725 - skater.core.explanations - DEBUG - Shape of sampled data: (1000, 10)
2021-07-20 07:45:27,726 - skater.core.explanations - DEBUG - Feature Ids: ['skill_curve']
2021-07-20 07:45:27,727 - skater.core.explanations - DEBUG - PD metadata: {'sd_column': 'sd_estimate', 'targ

[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

[    age     value_eur  sd_prediction    sd_estimate
 0    16  1.005903e+07   9.239088e+06  292165.602753
 1    18  1.005903e+07   9.239088e+06  292165.602753
 2    19  1.005903e+07   9.239088e+06  292165.602753
 3    20  1.005903e+07   9.239088e+06  292165.602753
 4    21  9.664409e+06   9.192643e+06  290696.892250
 5    22  9.432619e+06   9.186374e+06  290498.668330
 6    23  9.287217e+06   9.039529e+06  285855.003576
 7    24  9.192423e+06   8.967918e+06  283590.466360
 8    25  9.176101e+06   8.911109e+06  281794.014368
 9    26  8.962486e+06   8.866687e+06  280389.260469
 10   27  8.524142e+06   8.202239e+06  259377.572685
 11   28  8.052141e+06   7.659391e+06  242211.207540
 12   29  7.583782e+06   7.381666e+06  233428.790389
 13   30  7.074231e+06   7.212012e+06  228063.851054
 14   31  6.376370e+06   6.753165e+06  213553.812435
 15   32  6.251034e+06   6.573827e+06  207882.658085
 16   33  4.427949e+06   5.826020e+06  184234.923059
 17   34  4.081864e+06   5.726471e+06  181086.