# Feedforward Baseline Generation

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc, RocCurveDisplay, classification_report
from sklearn.preprocessing import LabelBinarizer
from scipy.stats import mode
import matplotlib.pyplot as plt
from itertools import cycle
import joblib

In [None]:
synthetic_data = pd.read_csv('syn_data_small.csv', index_col=0)
synthetic_data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(synthetic_data.drop(['output'], axis=1), synthetic_data['output'], test_size=0.2, random_state=42)

In [None]:
y_train.value_counts()

In [None]:
n_runs = 10

f1s = []
f1_ws = []

y_preds = np.zeros((n_runs, len(y_test)))
y_probs = np.zeros((n_runs, len(y_test), 3))

for i in range(n_runs):
    model = MLPClassifier(activation='tanh', hidden_layer_sizes=(100,50,10), max_iter=1000)
    model.fit(X_test, y_test)
    y_partical_pred = model.predict(X_test)

    f1 = round(f1_score(y_test, y_partical_pred, average="macro"), 2)
    f1s += [f1]
    f1_w = round(f1_score(y_test, y_partical_pred, average="weighted"), 2)
    f1_ws += [f1_w]
    print("Model", i, f1, "/", f1_w)

    y_preds[i] = y_partical_pred
    y_probs[i] = model.predict_proba(X_test)

print("AVG", sum(f1s)/len(f1s), "/", sum(f1_ws)/len(f1_ws))

In [None]:
y_pred = mode(y_pred, axis=0).mode

In [None]:
y_score = np.mean(y_probs, axis=0)

In [None]:
print(classification_report(y_test, y_pred, target_names=["Back2Home", "Reabilitation", "Death"]))

In [None]:
accuracy = round(accuracy_score(y_test, y_pred), 2)
print("Accuracy:", accuracy)
precision = round(precision_score(y_test, y_pred, average="macro"), 2)
precision_w = round(precision_score(y_test, y_pred, average="weighted"), 2)
print("Precision:", precision, "/", precision_w)
recall = round(recall_score(y_test, y_pred, average="macro"), 2)
recall_w = round(recall_score(y_test, y_pred, average="weighted"), 2)
print("Recall:", recall, "/", recall_w)
f1_micro = round(f1_score(y_test, y_pred, average="micro"), 2)
f1 = round(f1_score(y_test, y_pred, average="macro"), 2)
f1_w = round(f1_score(y_test, y_pred, average="weighted"), 2)
print("F1 Score:", f1, "/", f1_w, "/", f1_micro)
print("F1 per class:", [round(i, 2) for i in f1_score(y_test, y_pred, average=None)])
auc = round(roc_auc_score(y_test, y_score, multi_class='ovr'), 2)
print("AUC ROC:", auc)

In [None]:
matrix = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=matrix, display_labels=["Back2Home", "Reabilitation", "Death"])
disp.plot()
plt.show()

In [None]:
label_binarizer = LabelBinarizer().fit(y_train)
y_onehot_test = label_binarizer.transform(y_test)
y_onehot_test.shape  # (n_samples, n_classes)

In [None]:
label_binarizer.transform([0])

In [None]:
class_of_interest = 0
class_id = np.flatnonzero(label_binarizer.classes_ == class_of_interest)[0]
class_id

In [None]:
print(f"y_score:\n{y_score[0:2,:]}")
print()
print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}")

In [None]:
micro_roc_auc_ovr = roc_auc_score(
    y_test,
    y_score,
    multi_class="ovr",
    average="micro",
)

print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")

In [None]:
# store the fpr, tpr, and roc_auc for all averaging strategies
fpr, tpr, roc_auc = dict(), dict(), dict()
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_onehot_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{roc_auc['micro']:.2f}")

In [None]:
n_classes = 3

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_onehot_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

fpr_grid = np.linspace(0.0, 1.0, 1000)

# Interpolate all ROC curves at these points
mean_tpr = np.zeros_like(fpr_grid)

for i in range(n_classes):
    mean_tpr += np.interp(fpr_grid, fpr[i], tpr[i])  # linear interpolation

# Average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = fpr_grid
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

print(f"Macro-averaged One-vs-Rest ROC AUC score:\n{roc_auc['macro']:.2f}")

In [None]:
macro_roc_auc_ovr = roc_auc_score(
    y_test,
    y_score,
    multi_class="ovr",
    average="macro",
)

print(f"Macro-averaged One-vs-Rest ROC AUC score:\n{macro_roc_auc_ovr:.2f}")

In [None]:
class_weights = [0.443396, 0.432075, 0.124529]

# Compute ROC curve and ROC area for each class with sample weights
for i in range(n_classes):
    sample_weight = np.ones(y_onehot_test.shape[0]) * class_weights[i]
    fpr[i], tpr[i], _ = roc_curve(y_onehot_test[:, i], y_score[:, i], sample_weight=sample_weight)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Interpolation grid
fpr_grid = np.linspace(0.0, 1.0, 1000)

# Interpolate all ROC curves at these points
weighted_mean_tpr = np.zeros_like(fpr_grid)

for i in range(n_classes):
    interp_tpr = np.interp(fpr_grid, fpr[i], tpr[i])  # linear interpolation
    weighted_mean_tpr += class_weights[i] * interp_tpr

# Compute AUC for the weighted macro-average
fpr["weighted_macro"] = fpr_grid
tpr["weighted_macro"] = weighted_mean_tpr
roc_auc["weighted_macro"] = auc(fpr["weighted_macro"], tpr["weighted_macro"])

print(f"Weighted Macro-averaged One-vs-Rest ROC AUC score:\n{roc_auc['weighted_macro']:.2f}")


In [None]:
fig, ax = plt.subplots(figsize=(6, 6))

target_names = ["Back2Home", "Reabilitation", "Death"]

plt.plot(
    fpr["macro"],
    tpr["macro"],
    label=f"macro-average (AUC = {roc_auc['macro']:.2f})",
    color="navy",
    linestyle=":",
    linewidth=4,
)

plt.plot(
    fpr["weighted_macro"],
    tpr["weighted_macro"],
    label=f"weighted-average (AUC = {roc_auc['weighted_macro']:.2f})",
    color="deeppink",
    linestyle=":",
    linewidth=4,
)

colors = cycle(["aqua", "darkorange", "cornflowerblue"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_score[:, class_id],
        name=f"{target_names[class_id]}",
        color=color,
        ax=ax,
        plot_chance_level=(class_id == 2),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="Feedforward Neural Network",
)

In [None]:
plt.plot(
    fpr["weighted_macro"],
    tpr["weighted_macro"],
    label=f"weighted-average (AUC = {roc_auc['weighted_macro']:.2f})",
    color="deeppink",
)