In [None]:
from sklearn.metrics import confusion_matrix
from model_training import print_model_evaluation
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

In [None]:
model, X_train, y_train, X_test, y_test = 

**Confusion matrix**

In [None]:
print_model_evaluation(model, X_test, y_test)

In [None]:
y_pred  = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap="Greens", fmt="d", cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

**Feature Importance**

In [None]:
print("important features")
importances = model.feature_importances_
columns_enumeration = [(column, i) for i, column in enumerate(X_train.columns)]
columns_enumeration.sort()
for column, i in columns_enumeration:
    print(f"{column} {round(importances[i], ndigits=3)}", end=", ")

In [None]:
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 6))
plt.title("Feature Importance")
plt.bar(range(X_train.shape[1]), importances[indices], align="center")
plt.xticks(range(X_train.shape[1]), indices)
plt.xlabel("Feature Index")
plt.ylabel("Importance Score")
plt.show()

**Permutation Importance**

Permutation Importance evaluates the drop in model performance when the values of a feature are randomly shuffled. A larger drop indicates higher importance.

In [None]:
perm_importance_rf = permutation_importance(model, X_train, y_train, n_repeats=10, random_state=42)
sorted_importances_idx = perm_importance_rf.importances_mean.argsort()
importances = pd.DataFrame(
    perm_importance_rf.importances[sorted_importances_idx].T,
    columns=X_test.columns[sorted_importances_idx],
)
ax = importances.plot.box(vert=False, whis=10)
ax.set_title("Permutation Importances Random Forest (train set)")
ax.axvline(x=0, color="k", linestyle="--")
ax.set_xlabel("Decrease in accuracy score")
ax.figure.tight_layout()

Tree's Feature Importance from Mean Decrease in Impurity (MDI): the impurity-based feature importance ranks the numerical features to be the most important features.

In [None]:
feature_names = X_train.columns

mdi_importances = pd.Series(
    model[-1].feature_importances_, index=feature_names
).sort_values(ascending=True)
ax = mdi_importances.plot.barh()#color = )
ax.set_title("Random Forest Feature Importances (MDI)")
ax.figure.tight_layout()

**Predicted vs True new conversations**

In [None]:
df = pd.read_csv('data/new_conversation.csv')
id_test = X_test['id']
subset_df = df[df['id'].isin(id_test)]

Highlight using different colors the *true* (black) and *predicted* (blue) changes in conversations detected by RandomForest on the test set.

In [None]:
grouped = subset_df.groupby('group')
num_plots = len(grouped)
fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5*num_plots))

true_drifts = []
predicted_drifts = []

for i, (group, group_data) in enumerate(grouped):
    n_drift_true = 0
    n_drift_predicted = 0
    new_conversation = group_data['new_conversation'] == 'yes'
    axes[i].scatter(group_data['date'], new_conversation, label=group, marker='.',linewidth = 2, color='#FF5733')

    prev_status = None
    for date, message_id, status in zip(group_data['date'], group_data['index'], new_conversation): #CHECK FOR INDEX OR ID
        if prev_status is not None and status != prev_status and prev_status == True:
            axes[i].axvline(x=date, color='black', linestyle='-', linewidth=0.3, label = 'True Change')
            n_drift_true += 1

        if y_test[message_id] == True: #CHECK WITH THE ACTUAL FORMAT OF TEST
            axes[i].axvline(x=date, color='blue', linestyle='-', linewidth=0.3, label = 'Predicted Change')
            n_drift_predicted += 1
            
        prev_status = status

    axes[i].set_title(f'Drift-Group: {group}')
    axes[i].set_xlabel('Date')
    axes[i].set_ylabel('New Conversation')

    true_drifts.append((n_drift_true, group))
    predicted_drifts.append((n_drift_predicted, group))

plt.tight_layout()
plt.legend()
plt.show()

Compare frequency of *true* and *predicted* changes in conversations detected by RandomForest on the test set.

In [None]:
bar_width = 0.35

index = np.arange(len(grouped))

plt.barh(index, true_drifts, bar_width, label='True Drifts', color='lightblue')
plt.barh(index + bar_width, predicted_drifts, bar_width, label='Predicted Drifts', color='lightgreen')

plt.xlabel('Number of Drifts')
plt.ylabel('Group')
plt.title('Number of True and Predicted Drifts per Group')
plt.yticks(index + bar_width / 2, grouped)

plt.legend()
plt.show()
