### Error analysis

Error analysis is particularly useful in finding where is model does mistakes and inturn provides some
strategies to improve model performance

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
import utils

In [2]:
with open('class_list.txt', 'r') as fp:
    classes_list = fp.read().split(',')

In [3]:
for i in range(1, len(classes_list)):
    classes_list[i] = classes_list[i][2:]

In [4]:
classes_list

['Bread',
 'Dairy product',
 'Dessert',
 'Egg',
 'Fried food',
 'Meat',
 'Noodles/Pasta',
 'Rice',
 'Seafood',
 'Soup',
 'Vegetable/Fruit. ']

In [5]:
training_samples_class = {}
validation_samples_class = {}

In [6]:
for i in range(11):
    training_samples_class[classes_list[i]] = len(os.listdir('./training/' + str(i)))
    validation_samples_class[classes_list[i]] = len(os.listdir('./validation/' + str(i)))

In [7]:
training_samples_class

{'Bread': 994,
 'Dairy product': 429,
 'Dessert': 1500,
 'Egg': 986,
 'Fried food': 848,
 'Meat': 1325,
 'Noodles/Pasta': 440,
 'Rice': 280,
 'Seafood': 855,
 'Soup': 1500,
 'Vegetable/Fruit. ': 709}

In [8]:
validation_samples_class

{'Bread': 362,
 'Dairy product': 144,
 'Dessert': 500,
 'Egg': 327,
 'Fried food': 326,
 'Meat': 449,
 'Noodles/Pasta': 147,
 'Rice': 96,
 'Seafood': 347,
 'Soup': 500,
 'Vegetable/Fruit. ': 232}

### Load Xception model

The xception model shown to perform best on this dataset as compared with the resnet50 and mobilenet 
architechture. The model has shown results :<br>

Training accuracy:- 0.8435<br>
Validation accuracy:- 0.80053<br>
Test accuracy:- 0.8173<br>

Error in training data:- 0.1665 => 16.65%<br>
Error in validation data:- 0.19947 => 19.947%<br>
Error in test data:- 0.1827 => 18.27%<br>

### Lets create a dataframe

Create a dataframe which stores the results in format:<br>

Rows => No of classes<br>
Columns => [Total training samples, Proportion of training  data, Correctly classified, Wrongly classified, Training accuracy, % Error, % contrib to total error, Total validation samples, Proportion of validation data, Correctly classified, Wrongly classified, Validation accuracy, % Error , % contrib to total error]<br><br>
Total rows => NUM_CLASSES => 11<br>
Total columns => 14<br>

In [17]:
model = tf.keras.models.load_model('./models/xception-72-0.80.hdf5')

In [18]:
model_analysis = np.zeros((len(classes_list), 14))

In [19]:
# Adding total training and validation samples per class to df
model_analysis[:, 0] = list(training_samples_class.values())
model_analysis[:, 7] = list(validation_samples_class.values())

In [20]:
total_training_samples = sum(list(training_samples_class.values()))
total_validation_samples = sum(list(validation_samples_class.values()))

In [21]:
# Find % contribution of data per class in the total data
model_analysis[:, 1] = model_analysis[:, 0]
model_analysis[:, 1] = model_analysis[:, 1]*100 / total_training_samples
model_analysis[:, 8] = model_analysis[:, 7]
model_analysis[:, 8] = model_analysis[:, 8]*100 / total_validation_samples

In [111]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.xception.preprocess_input,
    rescale=1.0/255.0
)

In [46]:
def correctly_classified(directory):
    
    correctly_classified = {}
    for i in range(11):
        correctly_classified[i] = 0
    
    generator = datagen.flow_from_directory(directory, target_size=(224, 224), batch_size=1, shuffle=False)
    predictions = model.predict_generator(generator, steps=len(generator.filenames))
    preds = np.argmax(predictions, axis=-1)
    
    for i in range(len(generator.filenames)):
        current_class = int(generator.filenames[i].split("\\")[0])
        if current_class == preds[i]:
            correctly_classified[current_class] = correctly_classified[current_class] + 1
        
    return correctly_classified