In [None]:
# data handling
import pickle
import pandas as pd
import numpy as np
import time
from scipy import stats
# visualization
import seaborn as sns
import matplotlib.pyplot as plt
import shap
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score
# interpretation
from sklearn.inspection import PartialDependenceDisplay
import cudf
import cuml

In [None]:
regressor = pickle.load(open("stacking_regressor.pkl", "rb"))

In [None]:
trainDF = pd.read_csv('../app/data/02_train.csv', sep=';')
testDF = pd.read_csv('../app/data/02_test.csv', sep=';')

In [None]:
trainColumns = ['Lng', 'Lat', 'square', 'tradeYear', 'tradeMonth']
X_train = trainDF[trainColumns]
y_train = trainDF['totalPrice']
X_test = testDF[trainColumns]
y_test = testDF['totalPrice']

In [None]:
y_pred = regressor.predict(X_test.to_numpy())

In [None]:
# DF with predictions and deviation
predDF = X_test
predDF["totalPrice"] = y_test
predDF["prediction"] = y_pred
predDF["deviation"] = abs(predDF["totalPrice"] - predDF["prediction"])
predDF.head()

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(predDF["deviation"].to_numpy(), predDF["totalPrice"].to_numpy())
plt.title("real totalPrice and deviation of prediction")
plt.xlabel("deviation")
plt.ylabel("label: totalPrice")
plt.show()

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(predDF["prediction"].to_numpy(), predDF["totalPrice"].to_numpy(), c=predDF["deviation"].to_numpy())
plt.axvline(x=0, color="red")
plt.title("real totalPrice and predicted value")
plt.xlabel("prediction")
plt.ylabel("label: totalPrice")
plt.xlim((predDF["prediction"].min()-50,predDF["totalPrice"].max()+100))
plt.ylim((0,predDF["totalPrice"].max()+100))
plt.colorbar(label='color represents deviation')
plt.grid()
plt.show()

In [None]:
from interpret.ext.blackbox import TabularExplainer

In [None]:
explainer = TabularExplainer(regressor,
                             X_train,
                             features=trainColumns,
                             use_gpu=True)

In [None]:
global_explanation = explainer.explain_global(X_train[:10])

In [None]:
from raiwidgets import ExplanationDashboard
ExplanationDashboard(global_explanation, regressor, dataset=X_train[:10])