In [None]:
%reload_kedro

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from numpy import array #useful to parse values
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
INDEX_COL = "window_nbr"
LABEL_COL = "label"

In [None]:
mt_train = catalog.load("master_table_train_multic").set_index(INDEX_COL)
mt_test = catalog.load("master_table_test_multic").set_index(INDEX_COL)

y_train = mt_train[[LABEL_COL]]
y_test = mt_test[[LABEL_COL]]

y_pred = catalog.load("xgboost_model_predict")
df_model_rpt = catalog.load("xgboost_model_reporting")

### Base notes

In [None]:
df_model_rpt

### Evaluate model's split between train and test

In [None]:
y_train_split = y_train.rename(columns={"label": "train_data"})
y_test_split = y_test.rename(columns={"label": "test_data"})

y_all = y_train_split.merge(y_test_split, left_index=True, right_index=True, how="outer")

# evaluate if there's no data leakage between train and test sets
y_all.plot(figsize=(15,5), title="Timeseries sensical eval", style=".", colormap="plasma")

### Evaluate model's probability
- for correct labels, the more delocated to the right the better (meaning it's predicting the right label with high probability)
- for incorrect labels, the opposite

In [None]:
probas = eval(df_model_rpt["test_probas"].values[0])

probas_df = pd.DataFrame.from_dict(data=probas, orient="index")
probas_df = probas_df.sort_index()

df = probas_df.merge(y_test, left_index=True, right_index=True, how="inner")
assert df.shape[0] == probas_df.shape[0] == y_test.shape[0]
df2 = df.merge(y_pred, left_index=True, right_index=True, how="inner")
assert df.shape[0] == df2.shape[0]

df_right = df2[df2["label"] == df2["y_pred"]]
df_wrong = df2.drop(df_right.index)

df_right = df_right.drop(columns=["label"])
right_probas = df_right[["proba_label_0", "proba_label_1"]].max(axis=1)

df_wrong = df_wrong.drop(columns=["label"])
wrong_probas = df_wrong[["proba_label_0", "proba_label_1"]].max(axis=1)

In [None]:
right_probas.hist()

In [None]:
wrong_probas.hist()

### Evaluate feature importance

In [None]:
fte_imp = eval(df_model_rpt["fte_importance"][0])

data = {"features": fte_imp.keys(),
       "importance": fte_imp.values()}

df_fte_imp = pd.DataFrame.from_dict(data=data)
df_fte_imp = df_fte_imp.sort_values(by="importance", ascending=True)

df_fte_imp.set_index("features").plot(kind="barh", figsize=(15, 8))

### Evaluate: target class balancing

In [None]:
class_balance = eval(df_model_rpt["label_class_balance"][0])

data = {"class": class_balance.keys(),
       "percentage": class_balance.values()}

df_cls_blc = pd.DataFrame.from_dict(data=data)

df_cls_blc.set_index("class").plot(kind="bar", figsize=(5, 3), ylabel="percentage", legend=None)

### Evaluation: plot predicted and actual labels
- if model's accuracy is 100%, then this plot would have only one color and blank spaces
- times were there's a second color, it means there's a model's wrong prediction
- this chart helps understanding if the wrong predictions are concentrated in a particular time frame

In [None]:
# find out the points where there's wrong predictions through time
# the biggest the overlap, the better
y_test_pred = y_test.rename(columns={"label": "y_true"}) \
                .merge(y_pred, left_index=True, right_index=True, how="inner")
assert y_test_pred.shape[0] == y_test.shape[0] == y_pred.shape[0]

y_test_pred.plot(figsize=(15, 5), colormap="plasma")