In [1]:
# first we shall create a sample model 

from Models.fifa_model_generation import get_fifa_model

# the function returns a trained fifa model
model_obj, x_dataset, y_dataset  = get_fifa_model()
model_obj



GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='auto',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

array([16000000,  6500000,  7000000,  3500000,  2200000,  8000000,
       14000000,  9000000,   875000,  3900000,  4600000,  5000000,
        1900000,  1800000,  6000000,  3600000,  3800000, 10000000,
       32000000,  4000000,  3200000, 41000000,  7500000,  3300000,
       13000000,  4100000,  8500000,  2600000, 16500000,  2700000,
         975000, 12500000,  1500000, 19500000,  1400000,  4900000,
        3700000,  3400000,  5500000,  4300000, 11500000,  3000000,
       14500000, 12000000,  2900000, 40500000,  3100000,  9500000,
        2300000, 21000000,  1000000, 10500000,  1100000, 27000000,
        4700000,  2800000,  4500000,  2500000,  2100000,  1300000,
        1200000, 11000000, 13500000,  2400000,   625000,   675000,
        4400000,   525000,  4800000, 32500000, 56000000,   240000,
       46500000,  4200000, 57000000, 26000000,   725000, 15000000,
       58500000, 21500000, 34500000, 23500000,  2000000,   825000,
       18000000, 17000000, 24000000, 20000000, 29000000, 28500

In [72]:
# now we shall first see what we do
import pandas as pd

class DevExplainer:
    def __init__(self, model_obj, x_train, y_train, model_type="classification", features=None, target=None, prediction_fn=None):
        self.model = self.verify_model(model_obj)
        self.x_train = self.verify_data(x_train)
        self.y_train = self.verify_data(y_train)
        self.model_type = self.verify_model_type(model_type)
        self.feature_names = self.verify_features(features)
        self.target_names = self.verify_target(target)
        self.prediction_fn = prediction_fn
        self.interpreter = None
        self.inmemory_model = None
    
    def verify_model(self, model):
        print("\nModel Object Type : ", type(model))
        if type(model) != "str":
            print("Model Object Validated")
        else:
            raise TypeError('Please provide Proper Model Object')
        return model
            
    def verify_data(self, dataset):
        print("\nValidating dataset")
        print("Dataset Type : ", type(dataset))
        if isinstance(dataset, pd.DataFrame) or isinstance(dataset, pd.Series):
            return dataset
        else:
            raise TypeError('Please provide dataset of type: \nPandas Dataframe\nPandas series')
    
    def verify_model_type(self, model_type):
        print("\nValidating model type")
        print("Type entered :", model_type.lower())
        if model_type.lower() == "classification" or model_type.lower() == "regression":
            print("Model Type Validated")
            return model_type.lower()
        else:
            raise TypeError('Please provide model_type: \nClassification \nRegression')
    
    def verify_features(self, features):
        if features is None:
            print("\nValidating feature names")
            print("List of features detected :", list(self.x_train))
            return list(self.x_train)

    def verify_target(self, target):
        if target is None:
            print("\nValidating target names")
            if self.model_type == "classification":
                print("List of Target Values :", list(y_dataset.unique()))
                return list(y_dataset.unique())
            elif self.model_type == "regression":
                print("Target Column :", y_dataset.name)
                return [y_dataset.name]
            
    def get_partial_dependence_plot(self):
        self.interpreter = self.create_interpretation()
        self.inmemory_model = self.create_inmemory_model()
        pdp_list = []
        for feature in self.feature_names:
            pdp = self.interpreter.partial_dependence.partial_dependence(
            [feature], self.inmemory_model, filter_classes=self.target_names, n_jobs=-1
            )
            pdp_list.append(pdp)
        return pdp_list


    def model_prediction_fn(self):
        if self.prediction_fn is None:
            if callable(getattr(self.model, "predict_proba", None)):
                print("Checking if predict proba exists :",callable(getattr(self.model, "predict_proba", None)))
                return self.model.predict_proba            
            elif callable(getattr(self.model, "predict", None)):
                print("Checking if predict exists :",callable(getattr(self.model, "predict", None)))
                return self.model.predict
            else:
                raise TypeError("Please pass appropriate prediction function")
        else:
            return self.prediction_fn
    
    def find_unique(self):
        if self.model_type == "classification":
            return list(self.y_train.unique())
        else:
            return None
    
    def create_inmemory_model(self):
        from skater.model import InMemoryModel
        if self.inmemory_model is None:            
            print("Creating In Memory Model")
            self.inmemory_model = InMemoryModel(
                self.model_prediction_fn(),
                examples=self.x_train[0:10],
                target_names=self.target_names,
                unique_values=self.find_unique(),
                feature_names=self.feature_names,
            )
            return self.inmemory_model
        else:
            return self.inmemory_model
    
    def create_interpretation(self):
        from skater.core.explanations import Interpretation
        if self.interpreter is None:            
            print("Creating Interpretation")
            self.interpreter = Interpretation(
                training_data=self.x_train, training_labels=self.y_train, feature_names=self.feature_names
            )
            return self.interpreter
        else:
            return self.interpreter
        
            
    def wait(self, wait_time):
        import time
        time.sleep(wait_time)

In [73]:
explainer_obj = DevExplainer(
    model_obj = model_obj,
    x_train = x_dataset,
    y_train = y_dataset,
    model_type = "Regression"
)
explainer_obj.get_partial_dependence_plot()


Model Object Type :  <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Model Object Validated

Validating dataset
Dataset Type :  <class 'pandas.core.frame.DataFrame'>

Validating dataset
Dataset Type :  <class 'pandas.core.series.Series'>

Validating model type
Type entered : regression
Model Type Validated

Validating feature names
List of features detected : ['age', 'height_cm', 'weight_kg', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve']

Validating target names
Target Column : value_eur
Creating Interpretation
Creating In Memory Model
Checking if predict exists : True


faster runs, do progressbar=False


[25/25] grid cells ████████████████████ Time elapsed: 0 seconds

faster runs, do progressbar=False


[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

faster runs, do progressbar=False


[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

faster runs, do progressbar=False


[29/29] grid cells ████████████████████ Time elapsed: 1 seconds

faster runs, do progressbar=False


[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

faster runs, do progressbar=False


[29/29] grid cells ████████████████████ Time elapsed: 1 seconds

faster runs, do progressbar=False


[22/22] grid cells ████████████████████ Time elapsed: 0 seconds

faster runs, do progressbar=False


[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

faster runs, do progressbar=False


[27/27] grid cells ████████████████████ Time elapsed: 0 seconds

faster runs, do progressbar=False


[30/30] grid cells ████████████████████ Time elapsed: 1 seconds

[    age     value_eur  sd_prediction    sd_estimate
 0    16  9.751466e+06   9.396570e+06  297145.634900
 1    18  9.751466e+06   9.396570e+06  297145.634900
 2    19  9.751466e+06   9.396570e+06  297145.634900
 3    20  9.751466e+06   9.396570e+06  297145.634900
 4    21  9.335479e+06   9.356949e+06  295892.708899
 5    22  9.086123e+06   9.356228e+06  295869.915418
 6    23  8.942822e+06   9.228361e+06  291826.400820
 7    24  8.846643e+06   9.160839e+06  289691.160625
 8    25  8.836717e+06   9.102405e+06  287843.309612
 9    26  8.616281e+06   9.037589e+06  285793.673296
 10   27  8.244109e+06   8.552783e+06  270462.731408
 11   28  7.793055e+06   7.935758e+06  250950.702727
 12   29  7.346028e+06   7.728520e+06  244397.252197
 13   30  6.845412e+06   7.566180e+06  239263.633000
 14   31  6.193929e+06   7.278120e+06  230154.358990
 15   32  6.074633e+06   7.131843e+06  225528.663725
 16   33  4.274136e+06   6.414746e+06  202852.077809
 17   34  3.936496e+06   6.336699e+06  200384.