# Evaluation

In this notebook I evaluate the classification obtained from the ensemble in notebook 4. 

## Load predictions

Load the labels for the test data, load the predictions for test set computed in previous notebook:

In [1]:
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder

# load the labels for the test data
with np.load(os.path.join('production', 'features_point_clouds_test.npz'), allow_pickle=True) as data:
    data_dict = dict(data.items())
    metadata_test = data_dict['metadata']
# encode labels as ints
encoder = LabelEncoder().fit(metadata_test[:, 3])
labels_test = encoder.transform(metadata_test[:, 3])

# load predictions for test set computed in previous notebook
with np.load(os.path.join('production', 'predictions.npz'), allow_pickle=True) as data:
    data_dict = dict(data.items())
    pred_knn = data_dict['pred_knn']
    pred_logreg = data_dict['pred_logreg']


In [2]:
data_dict['pred_knn']

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 4, 3, 4, 4, 3, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 5, 5, 2, 5, 2, 2])

In [3]:
pred_logreg

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 0, 4, 4, 3, 4, 3, 3, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 5, 2, 2, 0, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 5, 5, 2, 5, 2, 2])

## Confusion matrices and classification reports
### k-NN

In [4]:
from sklearn.metrics import confusion_matrix
import pandas as pd

# Compute confusion matrix
matrix = confusion_matrix(
    y_true=labels_test, # array with true labels
    y_pred=pred_knn # array with predicted labels
)

# Format as a DataFrame
class_names = encoder.inverse_transform([0, 1, 2, 3, 4, 5])
matrix_df = pd.DataFrame(data=matrix, columns=class_names, index=class_names)
matrix_df.columns.name = 'Predictions'
matrix_df.index.name = 'True class'
matrix_df

Predictions,Bronze Age,Greek,Iron Age,Neolithic Linear Pottery Culture (LBK),Neolithic Stroked Pottery culture (SBK),Roman
True class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bronze Age,33,0,0,1,0,0
Greek,0,7,1,0,0,0
Iron Age,2,0,10,0,0,0
Neolithic Linear Pottery Culture (LBK),0,0,0,30,0,0
Neolithic Stroked Pottery culture (SBK),1,0,0,3,4,0
Roman,0,1,3,0,0,3


In [5]:
from sklearn.metrics import classification_report
print(classification_report(labels_test, pred_knn))

              precision    recall  f1-score   support

           0       0.92      0.97      0.94        34
           1       0.88      0.88      0.88         8
           2       0.71      0.83      0.77        12
           3       0.88      1.00      0.94        30
           4       1.00      0.50      0.67         8
           5       1.00      0.43      0.60         7

   micro avg       0.88      0.88      0.88        99
   macro avg       0.90      0.77      0.80        99
weighted avg       0.89      0.88      0.87        99



### Logistic regression

In [6]:
# Compute confusion matrix
matrix = confusion_matrix(
    y_true=labels_test, # array with true labels
    y_pred=pred_logreg # array with predicted labels
)

# Format as a DataFrame
class_names = encoder.inverse_transform([0, 1, 2, 3, 4, 5])
matrix_df = pd.DataFrame(data=matrix, columns=class_names, index=class_names)
matrix_df.columns.name = 'Predictions'
matrix_df.index.name = 'True class'
matrix_df

Predictions,Bronze Age,Greek,Iron Age,Neolithic Linear Pottery Culture (LBK),Neolithic Stroked Pottery culture (SBK),Roman
True class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bronze Age,33,0,0,1,0,0
Greek,0,7,1,0,0,0
Iron Age,2,0,9,0,0,1
Neolithic Linear Pottery Culture (LBK),0,0,0,29,1,0
Neolithic Stroked Pottery culture (SBK),1,0,0,4,3,0
Roman,0,1,3,0,0,3


In [7]:
print(classification_report(labels_test, pred_logreg))

              precision    recall  f1-score   support

           0       0.92      0.97      0.94        34
           1       0.88      0.88      0.88         8
           2       0.69      0.75      0.72        12
           3       0.85      0.97      0.91        30
           4       0.75      0.38      0.50         8
           5       0.75      0.43      0.55         7

   micro avg       0.85      0.85      0.85        99
   macro avg       0.81      0.73      0.75        99
weighted avg       0.84      0.85      0.84        99



In [8]:
pred_logreg

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 0, 4, 4, 3, 4, 3, 3, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 5, 2, 2, 0, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 5, 5, 2, 5, 2, 2])

In [9]:
pred_knn[0]

3