### Partial dependence plots

In [None]:
#libraries
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble.partial_dependence import partial_dependence
import pandas as pd

#custom
import utils
import plots
import transformers as tran

LABEL = "1Y_default"

%matplotlib inline  
%load_ext autoreload
%autoreload 2

In [None]:
#read from csvs
train_df = pd.read_csv("train_full.csv", na_filter = False)
test_df = pd.read_csv("test_full.csv", na_filter = False)

In [None]:
#sanity checks
print("Train shape: {}".format(train_df.shape))
print("Test shape: {}".format(test_df.shape))
print("Train:")
utils.check_missing(train_df)
print("Test:")
utils.check_missing(test_df)

In [None]:
cols_sel = ['Interest', 'Age', 'IncomeTotal', 'AppliedAmount']

In [None]:
#model
gb_sel = GradientBoostingClassifier() 
print(gb_sel)
#train
ohe_rf = tran.OHE_transformer()
X_train = ohe_rf.fit_transform(train_df.drop([LABEL], axis=1))
y_train = train_df[LABEL]
X_train_sel = X_train[cols_sel]
y_train_sel = train_df[LABEL]
#test
X_test = ohe_rf.transform(test_df.drop([LABEL], axis=1))
y_test = test_df[LABEL]
X_test_sel = X_test[cols_sel]
y_test_sel = test_df[LABEL]
#fit
gb_sel.fit(X_train_sel.values, y_train_sel.values)
#predict
train_preds_proba = gb_sel.predict_proba(X_train_sel.values)[:,1]
test_preds_proba = gb_sel.predict_proba(X_test_sel.values)[:,1]
#results
utils.print_results(train_preds_proba, y_train_sel, test_preds_proba, y_test_sel)
plots.plot_ROC_curve(gb_sel, X_test_sel, y_test_sel)
plots.FeaturesImportanceTree(gb_sel, X_train_sel.columns, figsize=(5,5))

In [None]:
features = ["Interest", "AppliedAmount"]
names = list(X_train_sel.columns)
fig, axs = plot_partial_dependence(gb_sel, X_train_sel, features,
                                   feature_names=names,
                                   n_jobs=3, grid_resolution=50)

In [None]:
features = [("Interest", "AppliedAmount")]
names = list(X_train_sel.columns)
fig, axs = plot_partial_dependence(gb_sel, X_train_sel, features,
                                   feature_names=names,
                                   n_jobs=3, grid_resolution=50)