In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    classification_report,
    mean_absolute_percentage_error,
    mean_squared_error,
    r2_score)

In [2]:
# Caricamento dei dati
tips = sns.load_dataset('tips')
X = tips.loc[:, ('total_bill', 'tip', 'size')].values
y = tips.loc[:, ('day')].values
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
# Addestramento del regressore logistico
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [4]:
# Esercizio 18.1
# Parte 1: metriche
print('Precisione: {}'.format(round(precision_score(y_test, y_pred, average='macro'), 2)))
print('Recall: {}'.format(round(recall_score(y_test, y_pred, average='macro'), 2)))
print('Accuracy: {}'.format(round(accuracy_score(y_test, y_pred), 2)))

Precisione: 0.25
Recall: 0.24
Accuracy: 0.28


In [5]:
# Parte 2: classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         Fri       0.00      0.00      0.00         5
         Sat       0.24      0.59      0.34        17
         Sun       0.50      0.15      0.24        26
        Thur       0.27      0.23      0.25        13

    accuracy                           0.28        61
   macro avg       0.25      0.24      0.21        61
weighted avg       0.34      0.28      0.25        61



In [6]:
# Esercizio 18.2
# Modifica delle label e retraining
y = tips.loc[:, ('time')].values
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

In [7]:
probs_pred = clf.predict_proba(X_test)
# Nota: considero soltanto i valori massimi per le predizioni
preds = [np.amax(pred) for pred in probs_pred]
y_pred = clf.predict(X_test)
preds_cls = list(zip(list(y_pred), preds, y_test))

In [8]:
# Conto TP, TN, FP, FN
def get_precision_recall_from_probs(probs, threshold=0.65):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for prob in probs:
        if prob[1] > threshold and (prob[0] == prob[2]):
            # Abbiamo un true positive
            tp += 1
        elif prob[1] > threshold and (prob[0] != prob[2]):
            # Abbiamo un false positive
            fp += 1
        elif prob[1] <= threshold and (prob[0] == prob[2]):
            # Abbiamo un true negative
            tn += 1
        elif prob[1] <= threshold and (prob[0] != prob[2]):
            # Abbiamo un false positive
            fn += 1
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    return precision, recall

p_65, r_65 = get_precision_recall_from_probs(preds_cls)
p_80, r_80 = get_precision_recall_from_probs(preds_cls, threshold=0.80)
p_50, r_50 = get_precision_recall_from_probs(preds_cls, threshold=0.50)
print(p_65, r_65)
print(p_80, r_80)
print(p_50, r_50)

0.6111111111111112 0.9166666666666666
0.7333333333333333 0.3548387096774194
0.6065573770491803 1.0


In [10]:
# Esercizio 18.3
X = tips['total_bill'].values.reshape(-1, 1)
y = tips['tip'].values.reshape(-1, 1)
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred = lin_reg.predict(X)
print('MAPE: {}'.format(round(mean_absolute_percentage_error(y, y_pred), 2)))
print('MSE: {}'.format(round(mean_squared_error(y, y_pred), 2)))
print('R2: {}'.format(round(r2_score(y, y_pred), 2)))

MAPE: 0.28
MSE: 1.04
R2: 0.46
