In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
seed = 1234

# 1. Classification and hold-out
## 1.1 Load 'abalone' dataset 

The following are the attribute names, attribute types, the measurement units and a brief description.

Your goal is to predict the sex of the abalone, based on the rest of the available information.

```
Name / Data Type / Measurement Unit / Description
-----------------------------
Length / continuous / mm / Longest shell measurement
Diameter / continuous / mm / perpendicular to length
Height / continuous / mm / with meat in shell
Whole weight / continuous / grams / whole abalone
Shucked weight / continuous / grams / weight of meat
Viscera weight / continuous / grams / gut weight (after bleeding)
Shell weight / continuous / grams / after being dried
Rings / integer / -- / +1.5 gives the age in years 
Sex / nominal / -- / 2: M, 0: F, and 1: I (infant)
```

In [None]:
# Load labels
df = pd.read_csv("abalone.csv", sep=" ", header=None, names=["length", "diameter", "height", "w_weight", "s_weight", "v_weight", "sh_weight", "rings", "sex"])
X = ??
y_truth = ??

# Count items for each class
??

## 1.2 Create train and test splits
- Use the train_test_split() method

In [None]:
# Separate data into training and test set
# Default test_size = 0.25
X_train, X_test, y_train, y_test = ??


## 1.3 Train classifier and make predictions
- Use Gaussian Naive Bayes classifier
- Random state to make results repeatable

In [None]:
clf = ??

y_test_pred = ??

## 1.4 Evaluate the results
- Evaluation using accuracy score

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# Compute accuracy
acc = ??
print(f"Accuracy = {acc:.2f}")

- **Accuracy** seems good, but if we look at the scores separately for each class...

In [None]:
# Precision, recall, f1, support: for each class
p, r, f1, support = precision_recall_fscore_support(y_test, y_test_pred)

for c in range(p.shape[0]):
    print(f"\nClass {c}:")
    print(f"number of items: {support[c]}")
    print(f"p = {p[c]:.2f}")
    print(f"r = {r[c]:.2f}")
    print(f"f1 = {f1[c]:.2f}")

# Macro average f1
macro_f1 = ?? 
    
# This score is important when you have class imbalancing
print(f"\nF1, macro-average: {macro_f1:2f}")

- Accuracy was good because of class imbalancing
- The **minority class** (c2) has a very low recall
- Indeed, the **macro-averaged** F1 is quite low.

### Let's verify this with a confusion matrix:

In [None]:
# Build the confusion matrix
conf_mat = confusion_matrix(y_test, y_test_pred)

In [None]:
# Plot the result
label_names = np.arange(p.shape[0])
conf_mat_df = pd.DataFrame(conf_mat, index = label_names, columns = label_names)
conf_mat_df.index.name = 'Actual'
conf_mat_df.columns.name = 'Predicted'
sns.heatmap(conf_mat_df, annot=True, cmap='GnBu', 
            annot_kws={"size": 16}, fmt='g', cbar=False)
plt.show()

# 2. Cross-Validation
##  2.1 With kfold.split()

In [None]:
from sklearn.model_selection import KFold
# K-Fold with 5 splits
kfold = KFold(n_splits=5, shuffle=True)

print("Scores for each kfold iteration.")
i = 0
for train_indices, test_indices in kfold.split(X, y_truth):
    # Prepare splits
    X_train = ??
    y_train = ??
    X_test = ??
    y_test = ??
    
    # Train and evaluate
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    y_test_pred = clf.predict(X_test)
    
    # Compute macro average f1
    _, _, f1, _ = precision_recall_fscore_support(y_test, y_test_pred)
    macro_f1 = f1.mean()
    
    print(f"Iteration {i}. macro-f1 = {macro_f1}")
    i+=1

## 2.2 With cross_val_score()
- Use scoring = 'f1_macro'

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
clf = GaussianNB()
f1_cv = ??

In [None]:
print(f"Macro-f1 for each iteration: {f1_cv}")
mean_macro_f1 = f1_cv.mean()
std_macro_f1 = f1_cv.std() * 2
print(f"Macro-f1 (statistics): {mean_macro_f1:.2f} (+/- {std_macro_f1:.2f})")

## 2.3 Leave-One-Out and scoring: cross_val_predict()

- The previous approach (average of F1 for each iteration) cannot be used with leave one out. 
    - Iteration 0: y_test = [1] -> F1?
    - Iteration 1: y_test = [0] -> F1?
    - ...
    - Iteration 2: y_test = [1] -> F1?
- When test set has only 1 sample, F1, precision and recall cannot be properly computed.
- The following solution trains N models with leave one out, fits them on test data to obtain the vector y_pred (each model predicts 1 single value inside y_pred). Finally, it computes a single score by comparing y_pred with y_truth

In [None]:
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import LeaveOneOut

clf = GaussianNB()
y_pred = cross_val_predict(clf, X, y_truth, cv=LeaveOneOut())
_, _, f1_loo, _ = precision_recall_fscore_support(y_truth, y_pred)
macro_f1_loo = f1_loo.mean()
print(f"F1, for each class: {f1_loo}")
print(f"Macro-f1 = {macro_f1_loo:.2f}")