<a href="https://colab.research.google.com/github/aniketsharma00411/sign-language-to-text-translator/blob/main/metric_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from google.colab import files
import os

from keras.preprocessing.image import ImageDataGenerator
from keras import models
from keras.applications import efficientnet
from keras.applications import mobilenet
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.stats import mode

In [None]:
os.chdir('./drive/My Drive/Sign Language to Text Translator')

# Loading Models

In [None]:
baseline_model = models.load_model('asl_basic.h5')

In [None]:
baseline_model_augmented = models.load_model('asl_basic_data_augmentation.h5')

In [None]:
efficient_net = models.load_model('asl_efficient_net_b0.h5')

In [None]:
mobilenet_augmented = models.load_model('asl_mobilenet_data_augmentation.h5')

In [None]:
ensemble = [models.load_model('asl_basic_ensemble_0.h5'),
            models.load_model('asl_basic_ensemble_1.h5'),
            models.load_model('asl_basic_ensemble_2.h5'),
            models.load_model('asl_basic_ensemble_3.h5'),
            models.load_model('asl_basic_ensemble_4.h5')]

# ASL Dataset

In [None]:
if not os.path.exists(os.path.expanduser('~')+'/.kaggle'):
    ! mkdir ~/.kaggle
if not os.path.exists(os.path.expanduser('~')+'/.kaggle/kaggle.json'):
    kaggle_api_file = files.upload()
    ! mv kaggle.json ~/.kaggle
    ! kaggle datasets download -d grassknoted/asl-alphabet
    ! mv asl-alphabet.zip ~/.kaggle
    ! unzip -q ~/.kaggle/asl-alphabet.zip -d ~/.kaggle
    ! rm -rf ~/.kaggle/asl_alphabet_train/asl_alphabet_train/del

Saving kaggle.json to kaggle.json
Downloading asl-alphabet.zip to /content/drive/My Drive/Sign Language to Text Translator
 99% 1.01G/1.03G [00:06<00:00, 165MB/s]
100% 1.03G/1.03G [00:07<00:00, 156MB/s]


In [None]:
true_labels_data = [num//3000 for num in range(84000)]

## Baseline Models

In [None]:
image_gen = ImageDataGenerator(rescale=1/255)

data_gen = image_gen.flow_from_directory(
    os.path.expanduser('~')+'/.kaggle/asl_alphabet_train/asl_alphabet_train',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_baseline_model_data = baseline_model.predict(data_gen, verbose=1)

Found 84000 images belonging to 28 classes.


In [None]:
classification_report(true_labels_data, np.argmax(predictions_baseline_model_data, axis=1), output_dict=True)

{'0': {'f1-score': 0.9770889487870619,
  'precision': 0.9877384196185286,
  'recall': 0.9666666666666667,
  'support': 3000},
 '1': {'f1-score': 0.9656425503799142,
  'precision': 0.9571054354944335,
  'recall': 0.9743333333333334,
  'support': 3000},
 '10': {'f1-score': 0.9918019073113602,
  'precision': 0.9956331877729258,
  'recall': 0.988,
  'support': 3000},
 '11': {'f1-score': 0.9909213180901143,
  'precision': 0.9996607869742198,
  'recall': 0.9823333333333333,
  'support': 3000},
 '12': {'f1-score': 0.9487267721954576,
  'precision': 0.9804409672830725,
  'recall': 0.919,
  'support': 3000},
 '13': {'f1-score': 0.8948929159802306,
  'precision': 0.8846905537459283,
  'recall': 0.9053333333333333,
  'support': 3000},
 '14': {'f1-score': 0.9450363605614747,
  'precision': 0.9591486440096121,
  'recall': 0.9313333333333333,
  'support': 3000},
 '15': {'f1-score': 0.9780146568954031,
  'precision': 0.9773635153129161,
  'recall': 0.9786666666666667,
  'support': 3000},
 '16': {'f1-

## Baseline Model data augmented

In [None]:
image_gen_aug = ImageDataGenerator(rescale=1/255)

data_gen_aug = image_gen_aug.flow_from_directory(
    os.path.expanduser('~')+'/.kaggle/asl_alphabet_train/asl_alphabet_train',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_augmented_model_data = baseline_model_augmented.predict(data_gen_aug, verbose=1)

Found 84000 images belonging to 28 classes.


In [None]:
classification_report(true_labels_data, np.argmax(predictions_augmented_model_data, axis=1), output_dict=True)

{'0': {'f1-score': 0.9833666001330672,
  'precision': 0.9814077025232404,
  'recall': 0.9853333333333333,
  'support': 3000},
 '1': {'f1-score': 0.9905268406182481,
  'precision': 0.9877361617500828,
  'recall': 0.9933333333333333,
  'support': 3000},
 '10': {'f1-score': 0.9853449695372962,
  'precision': 0.9736413927757891,
  'recall': 0.9973333333333333,
  'support': 3000},
 '11': {'f1-score': 0.9973395410708347,
  'precision': 0.9950232249502322,
  'recall': 0.9996666666666667,
  'support': 3000},
 '12': {'f1-score': 0.8769889840881273,
  'precision': 0.8105203619909502,
  'recall': 0.9553333333333334,
  'support': 3000},
 '13': {'f1-score': 0.8658294778585591,
  'precision': 0.8584534731323722,
  'recall': 0.8733333333333333,
  'support': 3000},
 '14': {'f1-score': 0.9885521885521886,
  'precision': 0.998639455782313,
  'recall': 0.9786666666666667,
  'support': 3000},
 '15': {'f1-score': 0.9415354963058143,
  'precision': 0.9085554866707998,
  'recall': 0.977,
  'support': 3000},


## Efficient Net

In [None]:
image_gen_efficient_net = ImageDataGenerator(preprocessing_function=efficientnet.preprocess_input)

data_gen_efficient_net = image_gen_efficient_net.flow_from_directory(
    os.path.expanduser('~')+'/.kaggle/asl_alphabet_train/asl_alphabet_train',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_efficient_net_data = efficient_net.predict(data_gen_efficient_net, verbose=1)

Found 84000 images belonging to 28 classes.


In [None]:
classification_report(true_labels_data, np.argmax(predictions_efficient_net_data, axis=1), output_dict=True)

{'0': {'f1-score': 0.9752925877763329,
  'precision': 0.9517766497461929,
  'recall': 1.0,
  'support': 3000},
 '1': {'f1-score': 0.999333777481679,
  'precision': 0.9986684420772304,
  'recall': 1.0,
  'support': 3000},
 '10': {'f1-score': 0.9840106595602931,
  'precision': 0.9833555259653795,
  'recall': 0.9846666666666667,
  'support': 3000},
 '11': {'f1-score': 0.9931219594027849,
  'precision': 0.999662276258021,
  'recall': 0.9866666666666667,
  'support': 3000},
 '12': {'f1-score': 0.9602385685884691,
  'precision': 0.9545454545454546,
  'recall': 0.966,
  'support': 3000},
 '13': {'f1-score': 0.9534612176814011,
  'precision': 0.9542570951585977,
  'recall': 0.9526666666666667,
  'support': 3000},
 '14': {'f1-score': 0.9809001826939047,
  'precision': 0.9774908970539556,
  'recall': 0.9843333333333333,
  'support': 3000},
 '15': {'f1-score': 0.986795921778372,
  'precision': 0.9896077774052967,
  'recall': 0.984,
  'support': 3000},
 '16': {'f1-score': 0.9853111074434725,
  'pr

## Mobilenet

In [None]:
image_gen_mobilenet = ImageDataGenerator(preprocessing_function=mobilenet.preprocess_input)

data_gen_mobilenet = image_gen_mobilenet.flow_from_directory(
    os.path.expanduser('~')+'/.kaggle/asl_alphabet_train/asl_alphabet_train',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_mobilenet_data = mobilenet_augmented.predict(data_gen_mobilenet, verbose=1)

Found 84000 images belonging to 28 classes.


In [None]:
classification_report(true_labels_data, np.argmax(predictions_mobilenet_data, axis=1), output_dict=True)

{'0': {'f1-score': 0.948456888323258,
  'precision': 0.9071819841752891,
  'recall': 0.9936666666666667,
  'support': 3000},
 '1': {'f1-score': 0.9898922949461474,
  'precision': 0.9841845140032949,
  'recall': 0.9956666666666667,
  'support': 3000},
 '10': {'f1-score': 0.9480731548007838,
  'precision': 0.9292573623559539,
  'recall': 0.9676666666666667,
  'support': 3000},
 '11': {'f1-score': 0.9851876234364715,
  'precision': 0.9730169050715215,
  'recall': 0.9976666666666667,
  'support': 3000},
 '12': {'f1-score': 0.9296270232230823,
  'precision': 0.9843517138599106,
  'recall': 0.8806666666666667,
  'support': 3000},
 '13': {'f1-score': 0.9387107018100984,
  'precision': 0.8963007883565798,
  'recall': 0.9853333333333333,
  'support': 3000},
 '14': {'f1-score': 0.9525629887054735,
  'precision': 0.9949183303085299,
  'recall': 0.9136666666666666,
  'support': 3000},
 '15': {'f1-score': 0.938217261436881,
  'precision': 0.8878310026777745,
  'recall': 0.9946666666666667,
  'suppo

## Ensemble

In [None]:
image_gen_ensemble = ImageDataGenerator(rescale=1/255)

data_gen_ensemble = image_gen_ensemble.flow_from_directory(
    os.path.expanduser('~')+'/.kaggle/asl_alphabet_train/asl_alphabet_train',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_ensemble_data = mode([np.argmax(model.predict(data_gen_ensemble, verbose=1), axis=1) for model in ensemble])[0][0]

Found 84000 images belonging to 28 classes.


In [None]:
classification_report(true_labels_data, predictions_ensemble_data, output_dict=True)

{'0': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '1': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '10': {'f1-score': 0.9998333055509252,
  'precision': 1.0,
  'recall': 0.9996666666666667,
  'support': 3000},
 '11': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '12': {'f1-score': 0.9998333055509252,
  'precision': 1.0,
  'recall': 0.9996666666666667,
  'support': 3000},
 '13': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '14': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '15': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '16': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 3000},
 '17': {'f1-score': 0.9995002498750626,
  'precision': 0.999000999000999,
  'recall': 1.0,
  'support': 3000},
 '18': {'f1-score': 0.9998333611064822,
  'precision': 0.9996667777407531,
  'recall': 1.0,
  'support': 3000},
 '19': {'f1-score': 1.0

# ASL Alphabets

In [None]:
true_labels_alpha = [num//32 for num in range(832)]

## Baseline Models

In [None]:
image_gen2 = ImageDataGenerator(rescale=1/255)

data_gen2 = image_gen2.flow_from_directory(
    'asl_alphabets',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_baseline_model_alpha = baseline_model.predict(data_gen2, verbose=1)

Found 832 images belonging to 26 classes.


In [None]:
classification_report(true_labels_alpha, np.argmax(predictions_baseline_model_alpha, axis=1), output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '1': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '10': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '11': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '12': {'f1-score': 0.03125,
  'precision': 0.03125,
  'recall': 0.03125,
  'support': 32},
 '13': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '14': {'f1-score': 0.9142857142857143,
  'precision': 0.8421052631578947,
  'recall': 1.0,
  'support': 32},
 '15': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '16': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '17': {'f1-score': 0.27027027027027023,
  'precision': 0.23809523809523808,
  'recall': 0.3125,
  'support': 32},
 '18': {'f1-score': 0.3316062176165803,
  'precision': 0.19875776397515527,
  'recall': 1.0,
  'support': 32},
 '19': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0

## Baseline Model data augmented

In [None]:
image_gen_aug2 = ImageDataGenerator(rescale=1/255)

data_gen_aug2 = image_gen_aug2.flow_from_directory(
    'asl_alphabets',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_augmented_model_alpha = baseline_model_augmented.predict(data_gen_aug2, verbose=1)

Found 832 images belonging to 26 classes.


In [None]:
classification_report(true_labels_alpha, np.argmax(predictions_augmented_model_alpha, axis=1), output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'f1-score': 0.2711864406779661,
  'precision': 0.1568627450980392,
  'recall': 1.0,
  'support': 32},
 '1': {'f1-score': 0.5517241379310345,
  'precision': 0.38095238095238093,
  'recall': 1.0,
  'support': 32},
 '10': {'f1-score': 0.3157894736842105,
  'precision': 0.2727272727272727,
  'recall': 0.375,
  'support': 32},
 '11': {'f1-score': 0.3157894736842105,
  'precision': 1.0,
  'recall': 0.1875,
  'support': 32},
 '12': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '13': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '14': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 32},
 '15': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '16': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '17': {'f1-score': 0.18518518518518515,
  'precision': 0.22727272727272727,
  'recall': 0.15625,
  'support': 32},
 '18': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '19': {'f1

## Efficient Net

In [None]:
image_gen_efficient_net2 = ImageDataGenerator(preprocessing_function=efficientnet.preprocess_input)

data_gen_efficient_net2 = image_gen_efficient_net2.flow_from_directory(
    'asl_alphabets',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_efficient_net_alpha = efficient_net.predict(data_gen_efficient_net2, verbose=1)

Found 832 images belonging to 26 classes.


In [None]:
classification_report(true_labels_alpha, np.argmax(predictions_efficient_net_alpha, axis=1), output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'f1-score': 0.45714285714285713,
  'precision': 0.2962962962962963,
  'recall': 1.0,
  'support': 32},
 '1': {'f1-score': 0.8205128205128205,
  'precision': 0.6956521739130435,
  'recall': 1.0,
  'support': 32},
 '10': {'f1-score': 0.35593220338983045,
  'precision': 0.2441860465116279,
  'recall': 0.65625,
  'support': 32},
 '11': {'f1-score': 0.9333333333333333,
  'precision': 1.0,
  'recall': 0.875,
  'support': 32},
 '12': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '13': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '14': {'f1-score': 0.9846153846153847,
  'precision': 0.9696969696969697,
  'recall': 1.0,
  'support': 32},
 '15': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '16': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '17': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '18': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '19': {'f1-sco

## Mobilenet

In [None]:
image_gen_mobilenet2 = ImageDataGenerator(preprocessing_function=mobilenet.preprocess_input)

data_gen_mobilenet2 = image_gen_mobilenet2.flow_from_directory(
    'asl_alphabets',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_mobilenet_alpha = mobilenet_augmented.predict(data_gen_mobilenet2, verbose=1)

Found 832 images belonging to 26 classes.


In [None]:
classification_report(true_labels_alpha, np.argmax(predictions_mobilenet_alpha, axis=1), output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'f1-score': 0.8999999999999999,
  'precision': 0.9642857142857143,
  'recall': 0.84375,
  'support': 32},
 '1': {'f1-score': 0.9846153846153847,
  'precision': 0.9696969696969697,
  'recall': 1.0,
  'support': 32},
 '10': {'f1-score': 0.5057471264367817,
  'precision': 0.4,
  'recall': 0.6875,
  'support': 32},
 '11': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 32},
 '12': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '13': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '14': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '15': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '16': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '17': {'f1-score': 0.06060606060606061,
  'precision': 1.0,
  'recall': 0.03125,
  'support': 32},
 '18': {'f1-score': 0.3375,
  'precision': 0.2109375,
  'recall': 0.84375,
  'support': 32},
 '19': {'f1-score': 0.0, 'precision': 0.0, 

## Ensemble

In [None]:
image_gen_ensemble2 = ImageDataGenerator(rescale=1/255)

data_gen_ensemble2 = image_gen_ensemble2.flow_from_directory(
    'asl_alphabets',
    target_size=(224, 224),
    class_mode=None,
    color_mode='rgb',
    shuffle=False
    )

predictions_ensemble_alpha = mode([np.argmax(model.predict(data_gen_ensemble2, verbose=1), axis=1) for model in ensemble])[0][0]

Found 832 images belonging to 26 classes.


In [None]:
classification_report(true_labels_alpha, predictions_ensemble_alpha, output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'f1-score': 0.9523809523809523,
  'precision': 0.967741935483871,
  'recall': 0.9375,
  'support': 32},
 '1': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 32},
 '10': {'f1-score': 0.358974358974359,
  'precision': 1.0,
  'recall': 0.21875,
  'support': 32},
 '11': {'f1-score': 1.0, 'precision': 1.0, 'recall': 1.0, 'support': 32},
 '12': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '13': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '14': {'f1-score': 0.6666666666666666,
  'precision': 0.5,
  'recall': 1.0,
  'support': 32},
 '15': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '16': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 32},
 '17': {'f1-score': 0.15789473684210525,
  'precision': 0.5,
  'recall': 0.09375,
  'support': 32},
 '18': {'f1-score': 0.3516483516483517,
  'precision': 0.21333333333333335,
  'recall': 1.0,
  'support': 32},
 '19': {'f1-score': 0.0, 'precision': 0.0,