# Sign-Language Recognition System

**About :** The data set is a collection of images of alphabets from the American Sign Language, separated in 29 folders which represent the various classes.

**Content :** The training data set contains 87,000 images which are 200x200 pixels. There are 29 classes, of which 26 are for the letters A-Z and 3 classes for SPACE, DELETE and NOTHING.
These 3 classes are very helpful in real-time applications, and classification.
The test data set contains a mere 29 images, to encourage the use of real-world test images.

![IMG](https://i.ibb.co/RD6dqqs/download.png)

In [None]:
from google.colab import files
files.upload()

In [None]:
# Getting the dataset using Kaggle API
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d grassknoted/asl-alphabet

In [None]:
!nvidia-smi -L

In [None]:
# downloading helper_functions.py
! wget helper_functions.py

## Importing Dependancies

In [None]:
# Importing Dependancies
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from helper_functions import unzip_data, walk_through_dir, plot_loss_curves, confusion_matrix, make_confusion_matrix

In [None]:
# Unzipping our data
unzip_data("/content/asl-alphabet.zip")

In [None]:
# To delete the zip file
!rm asl-alphabet.zip

In [None]:
# Walkthrough Directories (Train Dataset)
walk_through_dir("/content/asl_alphabet_train")

In [None]:
# Walkthrough Directories (Test Dataset)
walk_through_dir("/content/asl_alphabet_test")

## Plotting images from our train dataset

In [None]:
# Plotting a random image from the train dataset
random_class = "/content/asl_alphabet_train/asl_alphabet_train/" + random.choice(os.listdir("/content/asl_alphabet_train/asl_alphabet_train"))
random_filepath = random_class + "/" + random.choice(os.listdir(random_class))
img = plt.imread(random_filepath)
label = random_filepath.split("/")[4]
plt.imshow(img/255)
plt.axis(False)
plt.title(label, color="green"); 

In [None]:
# Plot multiple random images from the dataset
plt.figure(figsize=(17,12))
for i in range(12):
  random_class = "/content/asl_alphabet_train/asl_alphabet_train/" + random.choice(os.listdir("/content/asl_alphabet_train/asl_alphabet_train"))
  random_filepath = random_class + "/" + random.choice(os.listdir(random_class))
  img = plt.imread(random_filepath)
  label = random_filepath.split("/")[4]

  plt.subplot(3, 4, i+1)
  plt.axis(False)
  plt.title(label, color="green")
  plt.imshow(img/255);

## Preparing data for training the model

In [None]:
data_gen = ImageDataGenerator(validation_split=0.2)

In [None]:
# Creating train dataset
train_data = data_gen.flow_from_directory(directory="/content/asl_alphabet_train/asl_alphabet_train",
                                          batch_size=32,
                                          seed=42,
                                          subset="training",
                                          target_size=(200, 200))

# Creating validation dataset
val_data = data_gen.flow_from_directory(directory="/content/asl_alphabet_train/asl_alphabet_train",
                                        batch_size=32,
                                        seed=42,
                                        subset="validation",
                                        target_size=(200, 200))

In [None]:
len(train_data), len(val_data)

## Building the Model

In [None]:
# Creating base model
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False

# Building the model
inputs = tf.keras.Input(shape=(200, 200, 3), name="input_layer")
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling")(x)
outputs = tf.keras.layers.Dense(len(train_data.class_indices), activation="softmax", name="output_layer")(x)
model_1 = tf.keras.Model(inputs, outputs)

# Compiling the model
model_1.compile(loss="categorical_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

In [None]:
model_1.summary()

In [None]:
# Fit the model
history_1 = model_1.fit(train_data,
                        epochs=5,
                        batch_size=32,
                        validation_data=val_data,
                        callbacks=[tf.keras.callbacks.ModelCheckpoint("/content/drive/MyDrive/ASL-Model",
                                                                      monitor="accuracy",
                                                                      save_best_only=False)])

In [None]:
# Loading in the best_model
model = tf.keras.models.load_model("/content/drive/MyDrive/ASL-Model")
model.evaluate(val_data)

In [None]:
plot_loss_curves(history_1)

## Making predictions on the Model

In [None]:
# Validation file names
val_filenames = val_data.filepaths
val_filenames[:5]

In [None]:
# Generating val labels
val_labels = []
for fp in val_filenames:
  val_labels.append(fp.split("/")[4])

val_labels[:5]

In [None]:
# Making a Dataframe to store all these values
val_df = pd.DataFrame({"filepaths": val_file_paths,
                       "label": val_labels})
val_df.head()

In [None]:
data_gen = ImageDataGenerator()
val_data_df = data_gen.flow_from_dataframe(val_df,
                                           directory="/content/asl_alphabet_train",
                                           x_col="filepaths",
                                           y_col="label",
                                           target_size=(200, 200),
                                           shuffle=False)
len(val_data_df)

In [None]:
# Making predictions on model
val_pred_prob = model.predict(val_data_df)
val_pred_prob[:5]

In [None]:
# Predictions class wise
val_preds = tf.argmax(val_pred_prob, axis=1)
val_preds[:5]

In [None]:
# Generating class names
classes = []
for class_ in train_data.class_indices.keys():
  classes.append(class_)

len(classes)

In [None]:
classes

In [None]:
# To plot the sign lingual of the correspoding character
def plot_letter(letter_1="A", letter_2="B"):
  """
  Plots 2 random sign lingual image of the 
  characters you pass. including 'nothing',
  'del', 'space'

  default -> 'A', 'B'
  """
  dir = "/content/asl_alphabet_train/asl_alphabet_train"
  fp_1 = dir + "/" + letter_1 + "/" + random.choice(os.listdir(dir + "/" + letter_1))
  fp_2 = dir + "/" + letter_2 + "/" + random.choice(os.listdir(dir + "/" + letter_2))
  img1 = plt.imread(fp_1)
  img2 = plt.imread(fp_2)

  # Plotting
  plt.figure(figsize=(10, 7))
  plt.subplot(1, 2, 1)
  plt.imshow(img1)
  plt.axis(False)
  plt.title(letter_1, color="green")

  plt.subplot(1, 2, 2)
  plt.imshow(img2)
  plt.axis(False)
  plt.title(letter_2, color="green");

In [None]:
# Plot random letter
plot_letter()

## Plotting Predictions 

In [None]:
# Plotting the predictions made by the model
random_arr = random.sample(range(len(val_file_paths)), 12)
plt.figure(figsize=(17, 12))
for i in range(12):
  # Getting the img
  img = val_file_paths[random_arr[i]]
  label = val_labels[random_arr[i]]
  prob = np.max(val_pred_prob[random_arr[i]])
  pred = classes[val_preds[random_arr[i]]]
  _title_ = f"Actual: {label}, \nPred: {pred}, Prob: {prob:.2f}"
  col = "green" if label == pred else "red"

  # plotting the imgs
  plt.subplot(3, 4, i+1)
  img = plt.imread(img)
  plt.imshow(img)
  plt.axis(False)
  plt.title(_title_, color=col);

## Wrong Images

In [None]:
# Converting our val_preds into val_pred_labels
val_pred_labels = []
for i in val_preds:
  val_pred_labels.append(classes[i])

val_pred_labels[:5]

In [None]:
len(val_pred_labels)

In [None]:
# Creating arrays for wrong predictions
wrng_file_paths = []
wrng_pred_prob = []
wrng_pred = []
actual_label = []
for i in range(len(val_labels)):
  if not val_labels[i] == val_pred_labels[i]:
    wrng_file_paths.append(val_file_paths[i])
    wrng_pred.append(val_pred_labels[i])
    wrng_pred_prob.append(np.max(val_pred_prob[i]))
    actual_label.append(val_labels[i])

len(wrng_file_paths), len(wrng_pred_prob), len(wrng_pred), len(actual_label)

In [None]:
# Dataframe for wrong predicted signs
wrng_val_df = pd.DataFrame({"File Paths": wrng_file_paths,
                            "Label": actual_label,
                            "Pred": wrng_pred,
                            "Wrong Pred Prob": wrng_pred_prob})
wrng_val_df.head()

In [None]:
# Saving the dataframe as csv
wrng_val_df.to_csv("Wrong-predictions")

## Plotting wrong images

In [None]:
# Plotting wrongly predicted images
plt.figure(figsize=(17, 12))
random_arr = random.sample(wrng_file_paths, 12)
for i in range(12):
  index = np.where(np.array(wrng_file_paths) == random_arr[i])[0][0]
  filepath = wrng_file_paths[index]
  label = actual_label[index]
  pred = wrng_pred[index]
  prob = wrng_pred_prob[index]
  _title_ = f"Actual: {label}, \nPred: {pred}, Prob: {prob:.2f}"

  # Plotting the image
  plt.subplot(3, 4, i+1)
  img = plt.imread(filepath)
  plt.imshow(img)
  plt.axis(False)
  plt.title(_title_, color="red");

In [None]:
# To check how similar the two signs are
plot_letter("Q", "P")

## Prediction on test images

In [None]:
# Creating a preprocessing function
def load_and_preprocess(filepath):
  img = tf.io.read_file(filepath)
  img = tf.io.decode_image(img)
  img = tf.image.resize(img, (200, 200))

  return img

In [None]:
test_dir = "/content/asl_alphabet_test/asl_alphabet_test"
test_fp = os.listdir(test_dir)

In [None]:
test_fp[2][0]

In [None]:
test_fp[0].split("_")[0]

In [None]:
# Creating labels of test images
test_labels = []
for elem in test_fp:
  test_labels.append(elem.split("_")[0])

len(test_labels)

In [None]:
test_labels[:5]

In [None]:
test_df = pd.DataFrame({"filepath": test_fp,
                        "labels": test_labels})

In [None]:
test_df.head()

In [None]:
# Creating test data gen
test_data = data_gen.flow_from_dataframe(test_df,
                                         "/content/asl_alphabet_test/asl_alphabet_test",
                                         x_col="filepath",
                                         y_col="labels",
                                         shuffle=False,
                                         target_size=(200, 200))
len(test_data)

In [None]:
# predictions on test data
test_prob_preds = model.predict(test_data)
test_prob_preds

In [None]:
test_preds = tf.argmax(test_prob_preds, axis=1)
test_preds

In [None]:
# Converting our test_preds into test_pred_labels
test_pred_labels = []
for i in test_preds:
  test_pred_labels.append(classes[i])

test_pred_labels[:5]

In [None]:
len(test_labels), len(test_pred_labels)

## Plotting Predictions on all test images

In [None]:
# Plotting predictions on test data
plt.figure(figsize=(17, 12))
for i in range(12):
  filepath = test_dir + "/" + test_fp[i]
  prob = np.max(test_prob_preds[i])
  label = test_labels[i]
  pred = test_pred_labels[i]
  _title_ = f"Actual: {label}, \nPred: {pred}, Prob: {prob:.2f}"
  col = "green" if label == pred else "red"

  # Plotting 
  plt.subplot(3, 4, i+1)
  img = plt.imread(filepath)
  plt.imshow(img)
  plt.axis(False)
  plt.title(_title_, color=col);

In [None]:
# Plotting predictions on test data
i_ = 12
plt.figure(figsize=(17, 12))
for i in range(12):
  i_ += 1
  filepath = test_dir + "/" + test_fp[i_]
  prob = np.max(test_prob_preds[i_])
  label = test_labels[i_]
  pred = test_pred_labels[i_]
  _title_ = f"Actual: {label}, \nPred: {pred}, Prob: {prob:.2f}"
  col = "green" if label == pred else "red"

  # Plotting 
  plt.subplot(3, 4, i+1)
  img = plt.imread(filepath)
  plt.imshow(img)
  plt.axis(False)
  plt.title(_title_, color=col);

In [None]:
# Plotting predictions on test data
i_ = 24
plt.figure(figsize=(17, 4))
for i in range(3):
  i_ += 1
  filepath = test_dir + "/" + test_fp[i_]
  prob = np.max(test_prob_preds[i_])
  label = test_labels[i_]
  pred = test_pred_labels[i_]
  _title_ = f"Actual: {label}, \nPred: {pred}, Prob: {prob:.2f}"
  col = "green" if label == pred else "red"

  # Plotting 
  plt.subplot(1, 4, i+1)
  img = plt.imread(filepath)
  plt.imshow(img)
  plt.axis(False)
  plt.title(_title_, color=col);

In [None]:
# Encoding labels in numbers
num_labels = []
for i in test_labels:
  lab = np.where(np.array(classes) == i)[0][0]
  num_labels.append(lab)

In [None]:
# Confusion matrix for test dataset
make_confusion_matrix(y_true=num_labels,
                      y_pred=test_preds.numpy(),
                      classes=classes,
                      figsize=(50, 50))