## Load Packages

In [1]:
import warnings
warnings.filterwarnings('ignore')
##
import pandas as pd
import numpy as np
import dalex as dx
import math
import matplotlib.pyplot as plt
from pprint import pprint
##
from sklearn import preprocessing
from sklearn.decomposition import PCA
##
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV 
##
from sklearn.svm import NuSVC
from sklearn.svm import SVC
##
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier

## Function

In [2]:
def get_important_features(transformed_features, components_, columns):
    """
    This function will return the most "important" 
    features so we can determine which have the most
    effect on multi-dimensional scaling
    """
    num_columns = len(columns)

    # Scale the principal components by the max value in
    # the transformed set belonging to that component
    xvector = components_[0] * max(transformed_features[:,0])
    yvector = components_[1] * max(transformed_features[:,1])

    # Sort each column by it's length. These are your *original*
    # columns, not the principal components.
    important_features = { columns[i] : math.sqrt(xvector[i]**2 + yvector[i]**2) for i in range(num_columns) }
    important_features = sorted(zip(important_features.values(), important_features.keys()), reverse=True)
    return important_features

## Load Data 

In [3]:
databank = pd.read_csv('data.csv',low_memory=False, index_col=0)
databank.columns = databank.columns.str.lower()
databank.columns = databank.columns.str.rsplit('(', n=1).str.get(0)
databank.columns = databank.columns.str.replace(" ", "_")
databank.columns = databank.columns.str.replace("\\.", "")
databank.columns = databank.columns.str.replace("-", "_")
databank.columns = databank.columns.str.rstrip('_')
##
databank = databank.drop(['phase'], axis=1)
databank['viscosity'] = pd.to_numeric(databank['viscosity'],errors = 'coerce')
databank['therm_cond'] = pd.to_numeric(databank['therm_cond'],errors = 'coerce')

In [4]:
databank.isnull().sum().sum()

45432

In [5]:
databank = databank.dropna()

In [None]:
databank.info()

In [6]:
databank.head()

Unnamed: 0,temperature,pressure,density,internal_energy,enthalpy,entropy,cv,cp,sound_spd,joule_thomson,viscosity,therm_cond,fluid
0,273.16,1.0,999.84,3.3e-05,0.001835,0.000121,75.97,76.014,1402.4,-0.024141,0.001791,0.56109,Water
1,273.33,1.0,999.85,0.012701,0.014503,0.046483,75.963,76.004,1403.3,-0.024125,0.001781,0.56141,Water
2,273.49,1.0,999.86,0.025368,0.02717,0.092811,75.957,75.993,1404.1,-0.024109,0.00177,0.56173,Water
3,273.66,1.0,999.87,0.038033,0.039834,0.1391,75.95,75.983,1404.9,-0.024092,0.00176,0.56204,Water
4,273.83,1.0,999.88,0.050696,0.052497,0.18536,75.943,75.973,1405.8,-0.024076,0.00175,0.56236,Water


In [7]:
X = databank.drop(labels='fluid', axis=1)

## Correlation

In [8]:
corr = X.corr()
corr

Unnamed: 0,temperature,pressure,density,internal_energy,enthalpy,entropy,cv,cp,sound_spd,joule_thomson,viscosity,therm_cond
temperature,1.0,-7e-05,-0.284134,0.40142,0.458121,0.276476,-0.194763,-0.177681,-0.135023,0.032295,-0.29766,-0.228899
pressure,-7e-05,1.0,0.072677,-0.054971,-0.059051,-0.136202,0.048334,0.074366,0.052243,-0.097654,0.017558,0.063586
density,-0.284134,0.072677,1.0,-0.208803,-0.285342,-0.699045,0.913052,0.896221,0.967046,-0.223034,0.773671,0.988687
internal_energy,0.40142,-0.054971,-0.208803,1.0,0.995653,-0.032798,0.112838,0.162262,-0.13977,0.827372,-0.232866,-0.199912
enthalpy,0.458121,-0.059051,-0.285342,0.995653,1.0,0.031985,0.034962,0.083138,-0.20878,0.811404,-0.286944,-0.274136
entropy,0.276476,-0.136202,-0.699045,-0.032798,0.031985,1.0,-0.656624,-0.654669,-0.707087,-0.042692,-0.562709,-0.69214
cv,-0.194763,0.048334,0.913052,0.112838,0.034962,-0.656624,1.0,0.99004,0.866492,0.056538,0.72682,0.895988
cp,-0.177681,0.074366,0.896221,0.162262,0.083138,-0.654669,0.99004,1.0,0.846673,0.102654,0.656474,0.880594
sound_spd,-0.135023,0.052243,0.967046,-0.13977,-0.20878,-0.707087,0.866492,0.846673,1.0,-0.223943,0.733957,0.980927
joule_thomson,0.032295,-0.097654,-0.223034,0.827372,0.811404,-0.042692,0.056538,0.102654,-0.223943,1.0,-0.199242,-0.244067


In [9]:
features_corr = ~(corr.mask(np.eye(len(corr), dtype=bool)).abs() > 0.95).any() # 0.95 / 0.99
features_corr

temperature         True
pressure            True
density            False
internal_energy    False
enthalpy           False
entropy             True
cv                 False
cp                 False
sound_spd          False
joule_thomson       True
viscosity           True
therm_cond         False
dtype: bool

In [10]:
X_good = corr.loc[features_corr, features_corr]
lst_variable_corr = X_good.columns.values.tolist()
X_corr = X[np.intersect1d(X.columns, lst_variable_corr)]

In [11]:
df_corr = X[X_corr.columns]
df_corr

Unnamed: 0,entropy,joule_thomson,pressure,temperature,viscosity
0,0.000121,-0.024141,1.0,273.16,0.001791
1,0.046483,-0.024125,1.0,273.33,0.001781
2,0.092811,-0.024109,1.0,273.49,0.001770
3,0.139100,-0.024092,1.0,273.66,0.001760
4,0.185360,-0.024076,1.0,273.83,0.001750
...,...,...,...,...,...
157649,100.010000,0.410190,15.0,572.33,0.000024
157650,100.020000,0.409930,15.0,572.50,0.000024
157651,100.030000,0.409660,15.0,572.67,0.000024
157652,100.040000,0.409390,15.0,572.83,0.000024


## PCA
 - https://benalexkeen.com/principle-component-analysis-in-python/

In [None]:
pca = PCA(n_components=12, svd_solver='full')
pca.fit(X)

In [None]:
T = pca.transform(X)

In [None]:
T.shape

In [None]:
pca.explained_variance_ratio_

In [None]:
components = pd.DataFrame(pca.components_, columns = X.columns, index=[1,2,3,4,5,6,7,8,9,10,11,12])
components

In [None]:
pca_result = get_important_features(T, pca.components_, X.columns.values)
pca_result = pd.DataFrame(pca_result,columns=['PCA_Value','Variable'])
threshold = 3
pca_result = pca_result[pca_result["PCA_Value"] >= 3]
pca_result

In [None]:
X_pca = pca_result['Variable']
df_pca = X[X_pca]

In [None]:
df_pca

## Modelowanie

### Split dataset 

In [12]:
lb_enc = preprocessing.LabelEncoder()
databank["fluid"] = lb_enc.fit_transform(databank["fluid"])

In [13]:
y = databank.fluid
X = df_corr # df_pca

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify = databank['fluid'])

### Multiclass as One-Vs-One

#### Non-linear SVM

In [17]:
clf_nusvc = NuSVC()
clf_nusvc.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_nusvc.get_params())

Parameters currently in use:

{'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'nu': 0.5,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [20]:
nusvc_params = {
    'break_ties': [True,False],
    'cache_size': [200,500,1000],
    'class_weight': [None],
    'coef0': [0],
    'decision_function_shape': ['ovr'],
    'degree': [3,5,7,10],
    'gamma': ['scale','auto'],
    'max_iter': [-1],
    'nu': [0.5],
    'probability': [True,False],
    'random_state': [None],
    'shrinking': [True,False],
    'tol': [0.001],
    'verbose': [True,False]
}
pprint(nusvc_params)

{'break_ties': [True, False],
 'cache_size': [200, 500, 1000],
 'class_weight': [None],
 'coef0': [0],
 'decision_function_shape': ['ovr'],
 'degree': [3, 5, 7, 10],
 'gamma': ['scale', 'auto'],
 'max_iter': [-1],
 'nu': [0.5],
 'probability': [True, False],
 'random_state': [None],
 'shrinking': [True, False],
 'tol': [0.001],
 'verbose': [True, False]}


In [None]:
random_nusvc = RandomizedSearchCV(estimator = clf_nusvc, 
                                  param_distributions = nusvc_params, 
                                  n_iter = 100, 
                                  cv = 5, 
                                  verbose=2, 
                                  random_state=42, 
                                  n_jobs = -1)
random_nusvc.fit(X_train,y_train)
random_nusvc.best_params

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


In [None]:
exp_nusvc_clf = dx.Explainer(clf_nusvc, X_train, y_train)
exp_nusvc_clf.model_performance().result

In [None]:
exp_nusvc_random = dx.Explainer(random_nusvc, X_train, y_train)
exp_nusvc_random.model_performance().result

In [None]:
md_nusvc = exp_nusvc_clf.model_diagnostics()
md_nusvc.plot(md_nusvc, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_nusvc = exp_nusvc_clf.predict_surrogate(X.iloc[[1]])
lime_nusvc.plot()

#### SVC

In [15]:
clf_svc = SVC()
clf_svc.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_svc.get_params())

Parameters currently in use:

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [18]:
svc_params = {'C': [1.0],
    'break_ties': [True,False],
    'cache_size': [200],
    'class_weight': [None],
    'coef0': [0.0],
    'decision_function_shape': ['ovr'],
    'degree': [3],
    'gamma': ['scale','auto'],
    'kernel': ['linear','poly','rbf','sigmoid','precomputed'],
    'max_iter': [-1],
    'probability': [True,False],
    'random_state': [None],
    'shrinking': [True,False],
    'tol': [0.001],
    'verbose': [True,False]
}
pprint(svc_params)

{'C': [1.0],
 'break_ties': [True, False],
 'cache_size': [200],
 'class_weight': [None],
 'coef0': [0.0],
 'decision_function_shape': ['ovr'],
 'degree': [3],
 'gamma': ['scale', 'auto'],
 'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
 'max_iter': [-1],
 'probability': [True, False],
 'random_state': [None],
 'shrinking': [True, False],
 'tol': [0.001],
 'verbose': [True, False]}


In [None]:
random_svc = RandomizedSearchCV(estimator = clf_svc, 
                                  param_distributions = svc_params, 
                                  n_iter = 100, 
                                  cv = 5, 
                                  verbose=2, 
                                  random_state=42, 
                                  n_jobs = -1)
random_svc.fit(X_train,y_train)
random_svc.best_params

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


In [None]:
exp_svc_clf = dx.Explainer(clf_nusvc, X_train, y_train)
exp_svc_clf.model_performance().result

In [None]:
exp_svc_random = dx.Explainer(random_svc, X_train, y_train)
exp_svc_random.model_performance().result

In [None]:
md_svc = exp_svc_clf.model_diagnostics()
md_svc.plot(md_nusvc, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_svc = exp_svc_clf.predict_surrogate(X.iloc[[1]])
lime_svc.plot()

### Multiclass as One-Vs-The-Rest

#### Gradient Boosting Classifier

In [None]:
clf_gbm = GradientBoostingClassifier()
clf_gbm.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_gbm.get_params())

In [None]:
gbm_params = {
    "learning_rate": [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
    "min_samples_split": np.linspace(0.1, 0.5, 12),
    "min_samples_leaf": np.linspace(0.1, 0.5, 12),
    "max_depth":[3,5,8],
    "max_features":["log2","sqrt"],
    "criterion": ["friedman_mse",  "mae"],
    "subsample":[0.5, 0.618, 0.8, 0.85, 0.9, 0.95, 1.0],
    "n_estimators":[10]
}
pprint(gbm_params)

In [None]:
random_gbm = RandomizedSearchCV(estimator = clf_gbm, 
                                param_distributions = gbm_params, 
                                n_iter = 100, 
                                cv = 3, 
                                verbose=2, 
                                random_state=42, 
                                n_jobs = -1)
random_gbm.fit(X_train,y_train)
random_gbm.best_params

In [None]:
exp_gbm_clf = dx.Explainer(clf_gbm, X_train, y_train)
exp_gbm_clf.model_performance().result

In [None]:
exp_gbm_clf = dx.Explainer(random_gbm, X_train, y_train)
exp_gbm_clf.model_performance().result

In [None]:
md_gbm = exp_gbm_clf.model_diagnostics()
md_gbm.plot(md_gbm, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_gbm = exp_gbm_clf.predict_surrogate(X.iloc[[1]])
lime_gbm.plot()

#### Linear SVC

In [None]:
clf_linear = LinearSVC(multi_class="ovr")
clf_linear.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_linear.get_params())

In [None]:
linear_params = {
    'C': [1.0],
    'class_weight': ['dict','balanced'],
    'dual': [True,False],
    'fit_intercept': [True,False],
    'intercept_scaling': [1],
    'loss': ['hinge','squared_hinge'],
    'max_iter': [500,1000,2500,5000,10000],
    'multi_class': ['ovr'],
    'penalty': ['l1','l2'],
    'random_state': [0],
    'tol': [0.0001],
    'verbose': [0]
}
pprint(linear_params)

In [None]:
random_linear = RandomizedSearchCV(estimator = clf_linear, 
                                  param_distributions = linear_params, 
                                  n_iter = 100, 
                                  cv = 5, 
                                  verbose=2, 
                                  random_state=42, 
                                  n_jobs = -1)
random_linear.fit(X_train,y_train)
random_linear.best_params_nie                                  `````````

In [None]:
exp_linear_clf = dx.Explainer(clf_linear, X_train, y_train)
exp_linear_clf.model_performance().result

In [None]:
exp_linear_random = dx.Explainer(random_linear, X_train, y_train)
exp_linear_random.model_performance().result

In [None]:
md_linear = exp_logit_clf.model_diagnostics()
md_linear.plot(md_logit, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_linear = exp_linear_clf.predict_surrogate(X.iloc[[1]])
lime_linear.plot()

#### Logistic Regression

In [None]:
clf_logit = LogisticRegression(random_state=0, multi_class="ovr")
clf_logit.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_logit.get_params())

In [None]:
logit_params = {
     'C': [1.0],
     'class_weight': [None],
     'dual': [False], 
     'fit_intercept': [True,False],
     'intercept_scaling': [1],
     'l1_ratio': [0.1,0.5,0.7],
     'max_iter': [50,100,150,200,250], 
     'penalty': ['l2','elasticnet'],
     'random_state': [0],
     'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 
     'tol': [0.0001],
     'verbose': [0],
     'warm_start': [True,False],  
     'n_jobs': [-1]
}
pprint(logit_params)

In [None]:
random_logit = RandomizedSearchCV(estimator = clf_logit, 
                                  param_distributions = logit_params, 
                                  n_iter = 100, 
                                  cv = 5, 
                                  verbose=2, 
                                  random_state=42, 
                                  n_jobs = -1)
random_logit.fit(X_train,y_train)
random_logit.best_params_

In [None]:
exp_logit_clf = dx.Explainer(clf_logit, X_train, y_train)
exp_logit_clf.model_performance().result

In [None]:
exp_logit_random = dx.Explainer(random_logit, X_train, y_train)
exp_logit_random.model_performance().result

In [None]:
md_logit = exp_logit_clf.model_diagnostics()
md_logit.plot(md_logit, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_logit = exp_logit_clf.predict_surrogate(X.iloc[[1]])
lime_logit.plot()

#### LogisticRegressionCV

In [None]:
clf_logit_cv = LogisticRegressionCV(multi_class="ovr")
clf_logit_cv.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_logit_cv.get_params())

In [None]:
logit_cv_params = {'Cs': [5,10,15,20],
    'class_weight': [None],
    'cv': [3,5,10],
    'dual': [True,False],
    'fit_intercept': [True,False],
    'intercept_scaling': [1.0],
    'l1_ratios': ['l1','l2','elasticnet'],
    'max_iter': [50,100,150,250],
    'n_jobs': [-1],
    'penalty': ['l1','l2','elasticnet'],
    'random_state': [0],
    'refit': [True,False],
    'scoring': [None],
    'solver': ['lbfgs'],
    'tol': [0.0001],
    'verbose': [0]}
pprint(logit_cv_params)

In [None]:
random_logit_cv = RandomizedSearchCV(estimator = clf_logit_cv, 
                                param_distributions = logit_cv_params, 
                                n_iter = 100, 
                                cv = 5, 
                                verbose=2, 
                                random_state=42, 
                                n_jobs = -1)
random_logit_cv.fit(X_train,y_train)
random_logit_cv.best_params_

In [None]:
exp_logit_cv_clf = dx.Explainer(clf_logit_cv, X_train, y_train)
exp_logit_cv_clf.model_performance().result

In [None]:
exp_logit_cv_random = dx.Explainer(random_logit_cv, X_train, y_train)
exp_logit_cv_random.model_performance().result

In [None]:
md_logit_cv_clf = exp_logit_cv_clf.model_diagnostics()
md_logit_cv_clf.plot(md_logit_cv_clf, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_logit_cv_clf = exp_logit_cv_clf.predict_surrogate(X.iloc[[1]])
lime_logit_cv_clf.plot()

#### SGD Classifier

In [None]:
clf_sgd = SGDClassifier()
clf_sgd.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_sgd.get_params())

In [None]:
sgd_params = {
    'alpha': [0.0001],
    'average': [True,False],
    'class_weight': [None],
    'early_stopping': [True,False],
    'epsilon': [0.1],
    'eta0': [0.1],
    'fit_intercept': [True,False],
    'l1_ratio': [0.1,0.2,0.3],
    'learning_rate': ['constant','optimal','invscaling'],
    'loss': ['hinge','log','modified_huber','squared_hinge','perceptron'],
    'max_iter': [100,500,1000,10000],
    'n_iter_no_change': [5],
    'n_jobs': [-1],
    'penalty': ['l2','l1','elasticnet'],
    'power_t': [0.5,0.7,0.9],
    'random_state': [None],
    'shuffle': [True,False],
    'tol': [0.001],
    'validation_fraction': [0.1],
    'verbose': [0],
    'warm_start': [True,False]
}
pprint(sgd_params)

In [None]:
random_sgd = RandomizedSearchCV(estimator = clf_sgd, 
                                param_distributions = sgd_params, 
                                n_iter = 100, 
                                cv = 5, 
                                verbose=2, 
                                random_state=42, 
                                n_jobs = -1)
random_sgd.fit(X_train,y_train)
random_sgd.best_params_

In [None]:
exp_sgd_clf = dx.Explainer(clf_sgd, X_train, y_train)
exp_sgd_clf.model_performance().result

In [None]:
exp_sgd_random = dx.Explainer(random_sgd, X_train, y_train)
exp_sgd_random.model_performance().result

In [None]:
md_sgd = exp_sgd_random.model_diagnostics()
md_sgd.plot(md_sgd, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_sgd = exp_sgd_random.predict_surrogate(X.iloc[[1]])
lime_sgd.plot()

#### Perceptron

In [None]:
clf_perceptron = Perceptron()
clf_perceptron.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_perceptron.get_params())

In [None]:
perceptron_params = {
    'alpha': [0.0001],
    'class_weight': ['balanced','weight','dict'],
    'early_stopping': [True,False],
    'eta0': [0.5,0.7,1],
    'fit_intercept': [True,False],
    'max_iter': [100,500,1000,10000],
    'n_iter_no_change': [2,5,10],
    'n_jobs': [-1],
    'penalty': ['l2','l1','elasticnet'],
    'random_state': [0],
    'shuffle': [True,False],
    'tol': [0.001],
    'validation_fraction':[0.1],
    'verbose': [0],
    'warm_start': [True,False]
}
pprint(perceptron_params)

In [None]:
random_perceptron = RandomizedSearchCV(estimator = clf_perceptron, 
                                       param_distributions = perceptron_params, 
                                       n_iter = 100, 
                                       cv = 5, 
                                       verbose=2, 
                                       random_state=42, 
                                       n_jobs = -1)
random_perceptron.fit(X_train,y_train)
random_perceptron.best_params_

In [None]:
exp_perceptron_clf = dx.Explainer(clf_perceptron, X_train, y_train)
exp_perceptron_clf.model_performance().result

In [None]:
exp_perceptron_random = dx.Explainer(random_perceptron, X_train, y_train)
exp_perceptron_random.model_performance().result

In [None]:
md_perceptron = exp_perceptron_clf.model_diagnostics()
md_perceptron.plot(md_perceptron, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_perceptron = exp_perceptron_clf.predict_surrogate(X.iloc[[1]])
lime_perceptron.plot()

#### Passive Aggressive Classifier

In [None]:
clf_passive_aggressive = PassiveAggressiveClassifier()
clf_passive_aggressive.fit(X_train, y_train)
print('Parameters currently in use:\n')
pprint(clf_passive_aggressive.get_params())

In [None]:
passive_aggressive_params = {
    'C': [0.5,0.7,1.0,2.0],
    'average': [True,False],
    'class_weight': ['dict','balanced',None],
    'early_stopping': [True,False],
    'fit_intercept': [True,False],
    'loss': ['hinge','squared_hinge'],
    'max_iter': [100,250,500,1000,10000],
    'n_iter_no_change': [5,10,15,20],
    'n_jobs': [-1],
    'random_state': [42],
    'shuffle': [True,False],
    'tol': [0.001],
    'validation_fraction': [0.1,0.5,0.7],
    'verbose': [0],
    'warm_start': [True,False]
}
pprint(passive_aggressive_params)

In [None]:
random_passive_aggressive = RandomizedSearchCV(estimator = clf_passive_aggressive, 
                                               param_distributions = passive_aggressive_params, 
                                               n_iter = 100, 
                                               cv = 5, 
                                               verbose=2, 
                                               random_state=42, 
                                               n_jobs = -1)
random_passive_aggressive.fit(X_train,y_train)
random_passive_aggressive.best_params_

In [None]:
exp_passive_aggressive_clf = dx.Explainer(clf_passive_aggressive, X_train, y_train)
exp_passive_aggressive_clf.model_performance().result

In [None]:
exp_passive_aggressive_random = dx.Explainer(random_passive_aggressive, X_train, y_train)
exp_passive_aggressive_random.model_performance().result

In [None]:
md_passive_aggressive = exp_passive_aggressive_clf.model_diagnostics()
md_passive_aggressive.plot(md_passive_aggressive, variable='entropy', yvariable='residuals', marker_size=5)

In [None]:
lime_passive_aggressive = exp_passive_aggressive_clf.predict_surrogate(X.iloc[[1]])
lime_passive_aggressive.plot()