# This is Step 4 in the Pipeline - Evaluating the model
With this notebook we can evaluate and visualize preformance of the model trained in Step 3.

### Imports

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import os

### Parameters

In [2]:
MODEL_FOLDER = './source/model/tms_maccs/OneVsRestClassifier_DecisionTreeClassifier'

In [3]:
assert os.path.isdir(MODEL_FOLDER)
assert os.path.isfile(os.path.join(MODEL_FOLDER, 'metrics.csv')), 'metrics.csv not found'
assert os.path.isdir(os.path.join(MODEL_FOLDER, 'models')), 'models folder not found'

### Load Metrics

In [4]:
metrics = pd.read_csv(os.path.join(MODEL_FOLDER, 'metrics.csv'))
metrics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 25 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   repeat                     10 non-null     int64  
 1   fold                       10 non-null     int64  
 2   model_training_data_path   10 non-null     object 
 3   accuracy_score             10 non-null     float64
 4   log_loss                   10 non-null     float64
 5   hamming_loss               10 non-null     float64
 6   f1_score__micro            10 non-null     float64
 7   f1_score__macro            10 non-null     float64
 8   f1_score__weighted         10 non-null     float64
 9   f1_score__samples          10 non-null     float64
 10  precision_score__micro     10 non-null     float64
 11  precision_score__macro     10 non-null     float64
 12  precision_score__weighted  10 non-null     float64
 13  precision_score__samples   10 non-null     float64
 1

In [10]:
metrics

Unnamed: 0,repeat,fold,model_training_data_path,accuracy_score,log_loss,hamming_loss,f1_score__micro,f1_score__macro,f1_score__weighted,f1_score__samples,...,recall_score__macro,recall_score__weighted,recall_score__samples,jaccard_score__micro,jaccard_score__macro,jaccard_score__weighted,jaccard_score__samples,roc_auc_score,label_ranking_loss,coverage_error
0,0,0,./source/model/tms_maccs/OneVsRestClassifier_D...,0.026446,342.919973,0.068754,0.836332,0.450367,0.832305,0.838523,...,0.436237,0.82838,0.845132,0.718703,0.353903,0.75719,0.734506,,0.18789,153.14876
1,0,1,./source/model/tms_maccs/OneVsRestClassifier_D...,0.018182,308.850216,0.064084,0.843177,0.458386,0.843495,0.843734,...,0.45902,0.845031,0.857603,0.728874,0.360007,0.77237,0.741174,,0.175845,152.619835
2,0,2,./source/model/tms_maccs/OneVsRestClassifier_D...,0.019835,341.50476,0.071094,0.829048,0.437841,0.825388,0.832286,...,0.42861,0.825254,0.844928,0.708011,0.341186,0.750195,0.725857,,0.190279,152.991736
3,0,3,./source/model/tms_maccs/OneVsRestClassifier_D...,0.019835,349.291111,0.072369,0.825381,0.443114,0.821534,0.828538,...,0.428794,0.818849,0.838118,0.702679,0.344147,0.746104,0.720734,,0.196705,154.414876
4,0,4,./source/model/tms_maccs/OneVsRestClassifier_D...,0.041322,326.080544,0.066036,0.841824,0.457189,0.840151,0.841928,...,0.452977,0.838544,0.851639,0.726853,0.360661,0.767486,0.739843,,0.181452,148.642975
5,1,0,./source/model/tms_maccs/OneVsRestClassifier_D...,0.036364,342.590445,0.0698,0.831878,0.452339,0.828331,0.835228,...,0.439483,0.824875,0.844099,0.71215,0.354527,0.753882,0.73102,,0.189395,152.761983
6,1,1,./source/model/tms_maccs/OneVsRestClassifier_D...,0.034711,324.166784,0.06753,0.837042,0.461857,0.836086,0.836646,...,0.459083,0.837122,0.849811,0.719752,0.364069,0.761842,0.7316,,0.184607,154.661157
7,1,2,./source/model/tms_maccs/OneVsRestClassifier_D...,0.024793,314.080189,0.065727,0.839793,0.453665,0.839742,0.840785,...,0.457275,0.841775,0.85474,0.723831,0.356636,0.767622,0.737852,,0.179418,150.983471
8,1,3,./source/model/tms_maccs/OneVsRestClassifier_D...,0.024793,348.753478,0.070835,0.830611,0.440463,0.826896,0.833319,...,0.429804,0.822464,0.840483,0.710295,0.342911,0.751965,0.726771,,0.193513,155.87438
9,1,4,./source/model/tms_maccs/OneVsRestClassifier_D...,0.036364,336.693442,0.06752,0.838621,0.457358,0.836344,0.840541,...,0.448281,0.831791,0.847895,0.72209,0.358094,0.762759,0.738624,,0.184659,152.340496


### Load Model

In [5]:
model_files = []
for file in os.listdir(os.path.join(MODEL_FOLDER, 'models')):
    if file.endswith('.pkl'):
        model_files.append(file)

In [6]:
model_data = pickle.load(open(os.path.join(MODEL_FOLDER, 'models', model_files[0]), 'rb'))
model_data.keys()

dict_keys(['model', 'X_train', 'y_train', 'X_test', 'y_test'])

In [7]:
model = model_data['model']
X_train = model_data['X_train']
y_train = model_data['y_train']
X_test = model_data['X_test']
y_test = model_data['y_test']

In [8]:
model

### Visualize

... This notebook is being reconstructed for better statistical analysis ...