# Day 09. Exercise 02
# Metrics

## 0. Imports

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize

## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [2]:
df = pd.read_csv("../data/day-of-week-not-scaled.csv")
dayofweek = pd.read_csv("../data/dayofweek.csv")
df['dayofweek'] = dayofweek['dayofweek'].values
df.head()

Unnamed: 0,numTrials,hour,uid_user_0,uid_user_1,uid_user_10,uid_user_11,uid_user_12,uid_user_13,uid_user_14,uid_user_15,...,labname_lab03,labname_lab03s,labname_lab05s,labname_laba04,labname_laba04s,labname_laba05,labname_laba06,labname_laba06s,labname_project1,dayofweek
0,1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
1,2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
2,3,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
3,4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
4,5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4


In [3]:
X = df.drop(columns='dayofweek')
y = df['dayofweek']
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=21,
    stratify=y
)

## 2. SVM

1. Use the best parameters from the previous exercise and train the model of SVM.
2. You need to calculate `accuracy`, `precision`, `recall`, `ROC AUC`.

 - `precision` and `recall` should be calculated for each class (use `average='weighted'`)
 - `ROC AUC` should be calculated for each class against any other class (all possible pairwise combinations) and then weighted average should be applied for the final metric
 - the code in the cell should display the result as below:

```
accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878
```

In [4]:
svm_model = SVC(
    probability=True, 
    random_state=21, 
    C=10, 
    class_weight=None,
    gamma='auto',
    kernel='rbf'
)

svm_model.fit(X_train, y_train)

In [5]:
y_pred = svm_model.predict(X_test)
y_proba = svm_model.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

classes = np.unique(y_test)
y_test_binarized = label_binarize(y_test, classes=classes)
roc_auc = roc_auc_score(y_test_binarized, y_proba, average='weighted', multi_class='ovr')

print(f"accuracy is {accuracy:.5f}")
print(f"precision is {precision:.5f}")
print(f"recall is {recall:.5f}")
print(f"roc_auc is {roc_auc:.5f}")

accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.98168


## 3. Decision tree

1. The same task for decision tree

In [6]:
tree_model = DecisionTreeClassifier(
    random_state=21,
    class_weight='balanced',
    criterion='gini',
    max_depth=23
)

tree_model.fit(X_train, y_train)

In [7]:
y_pred = tree_model.predict(X_test)
y_proba = tree_model.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

classes = np.unique(y_test)
y_test_binarized = label_binarize(y_test, classes=classes)
roc_auc = roc_auc_score(y_test_binarized, y_proba, average='weighted', multi_class='ovr')

print(f"accuracy is {accuracy:.5f}")
print(f"precision is {precision:.5f}")
print(f"recall is {recall:.5f}")
print(f"roc_auc is {roc_auc:.5f}")

accuracy is 0.89349
precision is 0.89531
recall is 0.89349
roc_auc is 0.93692


## 4. Random forest

1. The same task for random forest.

In [8]:
forest_model = RandomForestClassifier(
    random_state=21, 
    class_weight=None,
    criterion='gini',
    max_depth=28,
    n_estimators=50
)

forest_model.fit(X_train, y_train)

In [9]:
y_pred = forest_model.predict(X_test)
y_proba = forest_model.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

classes = np.unique(y_test)
y_test_binarized = label_binarize(y_test, classes=classes)
roc_auc = roc_auc_score(y_test_binarized, y_proba, average='weighted', multi_class='ovr')

print(f"accuracy is {accuracy:.5f}")
print(f"precision is {precision:.5f}")
print(f"recall is {recall:.5f}")
print(f"roc_auc is {roc_auc:.5f}")

accuracy is 0.92899
precision is 0.93009
recall is 0.92899
roc_auc is 0.99151


## 5. Predictions

1. Choose the best model.
2. Analyze: for which `weekday` your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which `labname` and for which `users`.
3. Save the model.

In [10]:
best_model = RandomForestClassifier(
    random_state=21, 
    class_weight=None,
    criterion='gini',
    max_depth=28,
    n_estimators=50
)

best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

df_analyze = X_test.copy()
df_analyze['true'] = y_test
df_analyze['pred'] = y_pred
df_analyze['is_correct'] = df_analyze['true'] == df_analyze['pred']
df_analyze.head()

Unnamed: 0,numTrials,hour,uid_user_0,uid_user_1,uid_user_10,uid_user_11,uid_user_12,uid_user_13,uid_user_14,uid_user_15,...,labname_lab05s,labname_laba04,labname_laba04s,labname_laba05,labname_laba06,labname_laba06s,labname_project1,true,pred,is_correct
1087,67,17,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1,True
16,1,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5,5,True
563,14,10,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6,6,True
1381,20,15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3,3,True
1199,9,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2,2,True


In [11]:
# weekday
weekday_error = df_analyze.groupby('true')['is_correct'].mean().sort_values(ascending=False) * 100
weekday_error

true
6    98.591549
3    97.500000
5    94.444444
2    93.333333
1    89.090909
4    85.714286
0    74.074074
Name: is_correct, dtype: float64

ОТВЕТ: больше всего ошибок для 0 дня (понедельник)

In [12]:
# labname
labnames = [col for col in X_test.columns if col.startswith('labname')]

lab_errors = {}
for labname in labnames:
    with_lab = df_analyze[X_test[labname] == 1]
    if len(with_lab) > 0:
        lab_errors[labname] = (with_lab['is_correct'].sum() / len(with_lab)) * 100

lab_errors

{'labname_code_rvw': np.float64(92.3076923076923),
 'labname_lab03': np.float64(0.0),
 'labname_lab03s': np.float64(0.0),
 'labname_lab05s': np.float64(83.33333333333334),
 'labname_laba04': np.float64(82.85714285714286),
 'labname_laba04s': np.float64(100.0),
 'labname_laba05': np.float64(97.87234042553192),
 'labname_laba06': np.float64(77.77777777777779),
 'labname_laba06s': np.float64(86.66666666666667),
 'labname_project1': np.float64(95.16129032258065)}

ОТВЕТ: по лабораторным работам labname_lab03 и labname_lab03s нет ни одного верного предсказания (либо они просто не попали в выборку)

In [13]:
# user
users = [col for col in X_test.columns if col.startswith('uid')]

users_errors = {}
for user in users:
    with_user = df_analyze[df_analyze[user] == 1]
    if len(with_user) > 0:
        users_errors[user] = (with_user['is_correct'].sum() / len(with_user)) * 100

users_errors

{'uid_user_1': np.float64(100.0),
 'uid_user_10': np.float64(91.66666666666666),
 'uid_user_12': np.float64(100.0),
 'uid_user_13': np.float64(100.0),
 'uid_user_14': np.float64(96.7741935483871),
 'uid_user_15': np.float64(100.0),
 'uid_user_16': np.float64(60.0),
 'uid_user_17': np.float64(100.0),
 'uid_user_18': np.float64(83.33333333333334),
 'uid_user_19': np.float64(78.94736842105263),
 'uid_user_2': np.float64(96.42857142857143),
 'uid_user_20': np.float64(100.0),
 'uid_user_21': np.float64(100.0),
 'uid_user_22': np.float64(0.0),
 'uid_user_23': np.float64(100.0),
 'uid_user_24': np.float64(90.9090909090909),
 'uid_user_25': np.float64(90.9090909090909),
 'uid_user_26': np.float64(100.0),
 'uid_user_27': np.float64(83.33333333333334),
 'uid_user_28': np.float64(100.0),
 'uid_user_29': np.float64(90.9090909090909),
 'uid_user_3': np.float64(85.71428571428571),
 'uid_user_30': np.float64(87.5),
 'uid_user_31': np.float64(88.88888888888889),
 'uid_user_4': np.float64(92.5925925925

ОТВЕТ: по юзеру uid_user_22 нет ни одного верного предсказания

In [14]:
joblib.dump(best_model, 'forest_model.pkl')

['forest_model.pkl']

## 6. Function

1. Write a function that takes a list of different models and a corresponding list of parameters (dicts) and returns a dict that contains all the 4 metrics for each model.

In [15]:
def evaluate_models(models, params_list, X_train, y_train, X_test, y_test):
    results = {}

    classes = np.unique(y_test)
    y_test_binarized = label_binarize(y_test, classes=classes)

    for model, params in zip(models, params_list):
        model.set_params(**params)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test_binarized, y_proba, average='weighted', multi_class='ovr')

        results[model.__class__.__name__] = {
            'accuracy': round(accuracy, 5),
            'precision': round(precision, 5),
            'recall': round(recall, 5),
            'roc_auc': round(roc_auc, 5)
        }

    return results

In [16]:
models = [SVC(), DecisionTreeClassifier(), RandomForestClassifier()]

params_list = [
    {
        'probability': True,
        'random_state': 21,
        'C': 10,
        'class_weight': None,
        'gamma': 'auto',
        'kernel': 'rbf'
    },
    {
        'random_state': 21,
        'class_weight': 'balanced',
        'criterion': 'gini',
        'max_depth': 23
    },
    {
        'random_state': 21,
        'class_weight': None,
        'criterion': 'gini',
        'max_depth': 28,
        'n_estimators': 50
    }
]

In [17]:
evaluate_models(models, params_list, X_train, y_train, X_test, y_test)

{'SVC': {'accuracy': 0.88757,
  'precision': 0.89267,
  'recall': 0.88757,
  'roc_auc': np.float64(0.98168)},
 'DecisionTreeClassifier': {'accuracy': 0.89349,
  'precision': 0.89531,
  'recall': 0.89349,
  'roc_auc': np.float64(0.93692)},
 'RandomForestClassifier': {'accuracy': 0.92899,
  'precision': 0.93009,
  'recall': 0.92899,
  'roc_auc': np.float64(0.99151)}}

In [18]:
# SVC:
# accuracy is 0.88757
# precision is 0.89267
# recall is 0.88757
# roc_auc is 0.98168

In [19]:
# DecisionTreeClassifier:
# accuracy is 0.89349
# precision is 0.89531
# recall is 0.89349
# roc_auc is 0.93692

In [20]:
# RandomForestClassifier:
# accuracy is 0.92899
# precision is 0.93009
# recall is 0.92899
# roc_auc is 0.99151