# Evaluation

In this notebook I evaluate the classification obtained from the ensemble in notebook 4. 

## Load predictions

Load the labels for the test data, load the predictions for test set computed in previous notebook:

In [22]:
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder

# load the labels for the test data
with np.load(os.path.join('production', 'features_point_clouds_test.npz'), allow_pickle=True) as data:
    data_dict = dict(data.items())
    metadata_test = data_dict['metadata']
# encode labels as ints
encoder = LabelEncoder().fit(metadata_test[:, 3])
labels_test = encoder.transform(metadata_test[:, 3])

# load predictions for test set computed in previous notebook
with np.load(os.path.join('production', 'predictions.npz'), allow_pickle=True) as data:
    data_dict = dict(data.items())
    pred_elevation = data_dict['pred_elevation'],
    pred_plan = data_dict['pred_plan'],
    pred_section = data_dict['pred_section'],
    pred_textures = data_dict['pred_textures'],
    pred_knn = data_dict['pred_knn'],
    pred_logreg = data_dict['pred_logreg']


## Confusion matrices and clssification reports
### k-NN

In [31]:
from sklearn.metrics import confusion_matrix
import pandas as pd

# Compute confusion matrix
matrix = confusion_matrix(
    y_true=labels_test, # array with true labels
    y_pred=pred_knn[0] # array with predicted labels
)

# Format as a DataFrame
class_names = encoder.inverse_transform([0, 1, 2, 3, 4, 5])
matrix_df = pd.DataFrame(data=matrix, columns=class_names, index=class_names)
matrix_df.columns.name = 'Predictions'
matrix_df.index.name = 'True class'
matrix_df

Predictions,Bronze Age,Greek,Iron Age,Neolithic Linear Pottery Culture (LBK),Neolithic Stroked Pottery culture (SBK),Roman
True class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bronze Age,34,0,0,0,0,0
Greek,0,6,1,0,0,1
Iron Age,1,0,8,0,2,1
Neolithic Linear Pottery Culture (LBK),0,0,0,30,0,0
Neolithic Stroked Pottery culture (SBK),1,0,0,4,3,0
Roman,0,1,3,0,0,3


In [43]:
from sklearn.metrics import classification_report
print(classification_report(labels_test, pred_knn[0]))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        34
           1       0.86      0.75      0.80         8
           2       0.67      0.67      0.67        12
           3       0.88      1.00      0.94        30
           4       0.60      0.38      0.46         8
           5       0.60      0.43      0.50         7

   micro avg       0.85      0.85      0.85        99
   macro avg       0.76      0.70      0.72        99
weighted avg       0.83      0.85      0.84        99



### Logistic regression

In [38]:
# Compute confusion matrix
matrix = confusion_matrix(
    y_true=labels_test, # array with true labels
    y_pred=pred_logreg # array with predicted labels
)

# Format as a DataFrame
class_names = encoder.inverse_transform([0, 1, 2, 3, 4, 5])
matrix_df = pd.DataFrame(data=matrix, columns=class_names, index=class_names)
matrix_df.columns.name = 'Predictions'
matrix_df.index.name = 'True class'
matrix_df

Predictions,Bronze Age,Greek,Iron Age,Neolithic Linear Pottery Culture (LBK),Neolithic Stroked Pottery culture (SBK),Roman
True class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bronze Age,33,0,0,1,0,0
Greek,0,7,1,0,0,0
Iron Age,2,0,10,0,0,0
Neolithic Linear Pottery Culture (LBK),0,0,0,30,0,0
Neolithic Stroked Pottery culture (SBK),1,0,0,2,5,0
Roman,0,2,2,0,0,3


In [44]:
print(classification_report(labels_test, pred_logreg))

              precision    recall  f1-score   support

           0       0.92      0.97      0.94        34
           1       0.78      0.88      0.82         8
           2       0.77      0.83      0.80        12
           3       0.91      1.00      0.95        30
           4       1.00      0.62      0.77         8
           5       1.00      0.43      0.60         7

   micro avg       0.89      0.89      0.89        99
   macro avg       0.90      0.79      0.81        99
weighted avg       0.90      0.89      0.88        99



In [37]:
pred_logreg

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 0, 4, 4, 3, 4, 3, 3, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 5, 5, 1, 5, 2, 2])

In [41]:
pred_knn[0]

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 4, 3, 4, 3, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 4, 4, 5, 2, 2, 0, 1, 1, 1, 1,
       1, 1, 1, 5, 2, 5, 5, 2, 5, 2, 2])