# Valutazione dei modelli

In questo laboratorio vedremo diverse tecniche per valutare un modello e come confrontare diversi modelli di ML per scegliere il migliore.

Useremo un dataset sul churn recuperato da Kaggle

In [114]:
import pandas as pd
import numpy as np
import scipy.stats as st

In [None]:
# Carica il dataset dal file CSV
df = pd.read_csv("https://raw.githubusercontent.com/albayraktaroglu/Datasets/master/churn.csv")

# Visualizza le prime righe del dataset
df.head()

Eliminiamo le prime quattro colonne

In [116]:
df.drop(["Phone", "State", "Account Length",	"Area Code"], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.dtypes

Applichiamo il label encoding per convertire le colonne categoriche in numeriche, in quanto è necessario per allenare un modello SVM

In [None]:
from sklearn.preprocessing import LabelEncoder

df["Int'l Plan"] = df["Int'l Plan"].astype("category")
df["VMail Plan"] = df["VMail Plan"].astype("category")
df["Churn?"] = df["Churn?"].astype("category")

# Applica il LabelEncoder alle colonne categoriche
for col in ["Int'l Plan", "VMail Plan", "Churn?"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# Stampa il dataset trasformato
print(df.head())

In [None]:
df.dtypes

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop('Churn?', axis=1), df['Churn?'], test_size=0.2, stratify=df['Churn?'], random_state=42)

# Stampa la dimensione dei dati di addestramento e dei dati di test
print('Dimensione dati di addestramento:', X_train.shape)
print('Dimensione dati di test:', X_test.shape)

Alleniamo una SVM e calcoliamo le performance

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Addestra la SVM
svm = SVC(random_state=42)
svm.fit(X_train, y_train)

# Valuta il modello utilizzando i dati di test
y_pred = svm.predict(X_test)
accuracy_train_test = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Stampa le prestazioni del modello
print('Accuracy:', accuracy_train_test)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_pred, y_test)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_pred, y_test))

Essendo un problema sbilanciato, l'accuratezza è fuorviante. Bisogna analizzare altre metriche, come precision, recall e f1-score.

Vediamo ora un altro metodo per valutare le performance di un classificatore: la curva ROC.

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt

# Calcola le probabilità di predizione del modello
svm = SVC(kernel='linear', C=1, random_state=42, probability=True)
svm.fit(X_train, y_train)
y_pred_prob = svm.predict_proba(X_test)[:, 1]

# Calcola la curva ROC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)

# Calcola l'AUC della curva ROC
roc_auc = roc_auc_score(y_test, y_pred_prob)

# Disegna la curva ROC
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()

La curva ROC può essere usata per comparare diversi modelli di ML per capire quale è migliore. Facciamo il training di un albero decisionale e confrontiamo quest'ultimo con l'SVM precedentemente allenato.

In [None]:
from sklearn.tree import DecisionTreeClassifier

y_svm_pred_prob = y_pred_prob

# Addestra l'albero decisionale
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train, y_train)

# Calcola la curva ROC e l'AUC della SVM
fpr_svm, tpr_svm, thresholds_svm = roc_curve(y_test, y_svm_pred_prob)
auc_svm = roc_auc_score(y_test, y_svm_pred_prob)

# Calcola le probabilità di predizione del modello dell'albero decisionale utilizzando predict_proba
y_dt_pred_prob = dt.predict_proba(X_test)[:, 1]

# Calcola la curva ROC e l'AUC dell'albero decisionale
fpr_dt, tpr_dt, thresholds_dt = roc_curve(y_test, y_dt_pred_prob)
auc_dt = roc_auc_score(y_test, y_dt_pred_prob)

# Plotta la curva ROC dei due modelli per comparare le performance
plt.plot(fpr_svm, tpr_svm, label=f'SVM (AUC = {auc_svm:.2f})')
plt.plot(fpr_dt, tpr_dt, label=f'Decision Tree (AUC = {auc_dt:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

Due o più modelli possono anche essere comparati in termini di tempo impiegato in fase di training.

In [127]:
from time import time

# Addestra la SVM e calcola il tempo di training
start_time = time()
svm = SVC(kernel='linear', C=1, random_state=42)
svm.fit(X_train, y_train)
end_time = time()
svm_training_time = end_time - start_time

# Addestra l'albero decisionale e calcola il tempo di training
start_time = time()
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train, y_train)
end_time = time()
dt_training_time = end_time - start_time

# Stampa il tempo di training dei due modelli
print(f'Tempo di training SVM: {svm_training_time:.2f} secondi')
print(f'Tempo di training Decision Tree: {dt_training_time:.2f} secondi')

Tempo di training SVM: 68.79 secondi
Tempo di training Decision Tree: 0.02 secondi


# 10-fold Cross-Validation

In [128]:
from sklearn.model_selection import KFold,StratifiedKFold

In [129]:
n_fold = 10
folds = KFold(n_splits=n_fold, shuffle=True)

In [130]:
y=df['Churn?']
df = df.drop('Churn?', axis=1)

In [131]:
test_pred_proba = np.zeros((df.shape[0], 2))
accuracy_k_fold = []

for n_fold, (train_idx, valid_idx) in enumerate(folds.split(df, y)):
        X_train, X_valid = df.iloc[train_idx], df.iloc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = DecisionTreeClassifier(max_depth=3, random_state=42)
        model.fit(X_train, y_train)

        y_pred_valid = model.predict(X_valid)
        accuracy_k_fold.append(accuracy_score(y_valid,y_pred_valid))

In [None]:
accuracy_k_fold

In [None]:
st.t.interval(confidence=0.90, df=len(accuracy_k_fold)-1, loc=np.mean(accuracy_k_fold), scale=st.sem(accuracy_k_fold))

# Stratified Cross-validation

In [134]:
n_fold = 10
folds = StratifiedKFold(n_splits=n_fold, shuffle=True)

In [135]:
accuracy_stratified = []

for n_fold, (train_idx, valid_idx) in enumerate(folds.split(df, y)):
        X_train, X_valid = df.iloc[train_idx], df.iloc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = DecisionTreeClassifier(max_depth=3, random_state=42)
        model.fit(X_train, y_train)

        y_pred_valid = model.predict(X_valid)
        accuracy_stratified.append(accuracy_score(y_valid,y_pred_valid))

In [None]:
accuracy_stratified

In [None]:
st.t.interval(confidence=0.90, df=len(accuracy_stratified)-1, loc=np.mean(accuracy_stratified), scale=st.sem(accuracy_stratified))

# Leave-One-Out

In [138]:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score

In [139]:
loocv = LeaveOneOut()
model = DecisionTreeClassifier(max_depth=3, random_state=42)
results = cross_val_score(model, df, y, cv=loocv)

In [None]:
results

In [None]:
results.mean()

# Repeated cv

In [142]:
from sklearn.model_selection import RepeatedKFold

In [None]:
rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
rkf.get_n_splits(df, y)

In [144]:
accuracy_repeated_cv = []
for n_fold, (train_idx, valid_idx) in enumerate(rkf.split(df, y)):
        X_train, X_valid = df.iloc[train_idx], df.iloc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = DecisionTreeClassifier(max_depth=3, random_state=42)
        model.fit(X_train, y_train)

        y_pred_valid = model.predict(X_valid)
        accuracy_repeated_cv.append(accuracy_score(y_valid,y_pred_valid))

In [None]:
accuracy_repeated_cv

In [146]:
# create 95% confidence interval
st.t.interval(confidence=0.90, df=len(accuracy_repeated_cv)-1, loc=np.mean(accuracy_repeated_cv), scale=st.sem(accuracy_repeated_cv))

(0.8984173351526518, 0.9061932634699231)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Calculate mean and confidence interval on repeated k-fold
mean_value = np.mean(accuracy_repeated_cv)
confidence_interval = stats.t.interval(0.95, len(accuracy_repeated_cv)-1, loc=np.mean(accuracy_repeated_cv), scale=stats.sem(accuracy_repeated_cv))

# Plot the mean and confidence interval
plt.errorbar(0, mean_value, yerr=(confidence_interval[1] - confidence_interval[0])/2, fmt='o', label='Repeated CV')

# Calculate mean and confidence interval on k-fold
mean_value = np.mean(accuracy_k_fold)
confidence_interval = stats.t.interval(0.95, len(accuracy_k_fold)-1, loc=np.mean(accuracy_k_fold), scale=stats.sem(accuracy_k_fold))

# Plot the mean and confidence interval
plt.errorbar(1, mean_value, yerr=(confidence_interval[1] - confidence_interval[0])/2, fmt='o', label='K-Fold')


# Add labels and title
plt.xlabel('Group')
plt.ylabel('Value')
plt.title('Mean with Confidence Interval')

# Show the plot
plt.legend()
plt.show()


# Assignment
Indurre un modello Naive Bayes e confrontarlo con i modelli Decision Tree e SVM in termini di:
- capacità capacità predittive (accuratezza, f-measure, roc, auc)
- tempi di training
- intervalli di confidenza su una misura adeguata