<a href="https://colab.research.google.com/github/aniketsharma00411/sign-language-to-text-translator/blob/main/ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization

In [None]:
from google.colab import files
import os

from keras.preprocessing.image import ImageDataGenerator
from keras import models
from keras import layers
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

import random
import shutil

In [None]:
if not os.path.exists(os.path.expanduser('~')+'/.kaggle'):
    ! mkdir ~/.kaggle
os.chdir(os.path.expanduser('~')+'/.kaggle')
if not os.path.exists(os.path.expanduser('~')+'/.kaggle/kaggle.json'):
    kaggle_api_file = files.upload()
    ! kaggle datasets download -d grassknoted/asl-alphabet
    ! unzip -q asl-alphabet.zip
    ! rm -rf asl_alphabet_train/asl_alphabet_train/del
    ! rm asl-alphabet.zip

Saving kaggle.json to kaggle.json
Downloading asl-alphabet.zip to /root/.kaggle
 99% 1.02G/1.03G [00:04<00:00, 220MB/s]
100% 1.03G/1.03G [00:04<00:00, 223MB/s]


In [None]:
! ls

asl_alphabet_test  asl_alphabet_train  kaggle.json


# Bootstrap Aggregation

In [None]:
random.seed(42)

In [None]:
def create_data():
    if 'bagging_data' in os.listdir():
        shutil.rmtree('bagging_data')
    os.mkdir('bagging_data')

    characters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'nothing', 'space']
    for character in characters:
        os.mkdir(os.path.join('bagging_data', character))
        to_copy = random.choices(os.listdir('asl_alphabet_train/asl_alphabet_train/'+character), k=3000)
        for num, image in enumerate(to_copy):
            shutil.copy(
                os.path.join('asl_alphabet_train/asl_alphabet_train/', character, image),
                os.path.join('bagging_data', character, str(num)+'.jpg')
                )

# Data preprocessing

In [None]:
def get_data_generators():
    create_data()
    train_data = 'bagging_data'
    image_gen = ImageDataGenerator(
        rescale=1/255,
        validation_split=0.2
        )
    
    train_gen = image_gen.flow_from_directory(
        train_data,
        target_size=(224,224),
        class_mode='categorical',
        color_mode='rgb',
        shuffle=True,
        batch_size=32,
        seed=0,
        subset='training'
        )
    
    val_gen = image_gen.flow_from_directory(
        train_data,
        target_size=(224,224),
        class_mode='categorical',
        color_mode='rgb',
        shuffle=True,
        batch_size=32,
        seed=0,
        subset='validation'
        )
    
    return train_gen, val_gen

# Creating model

In [None]:
def get_model():
    model = models.Sequential()

    model.add(layers.Conv2D(filters=64,
                            kernel_size=9,
                            padding='same',
                            activation='relu',
                            input_shape=(224, 224, 3)))
    model.add(layers.MaxPooling2D(pool_size=2,
                                padding='same'))
    model.add(layers.Conv2D(filters=64,
                            kernel_size=9,
                            padding='same',
                            activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=2,
                                padding='same'))
    model.add(layers.Conv2D(filters=64,
                            kernel_size=9,
                            padding='same',
                            activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=2,
                                padding='same'))
    model.add(layers.Conv2D(filters=64,
                            kernel_size=9,
                            padding='same',
                            activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=2,
                                padding='same'))
    model.add(layers.Flatten())
    model.add(layers.Dense(units=128,
                            activation='relu'))
    model.add(layers.Dense(units=128,
                            activation='relu'))
    model.add(layers.Dense(units=128,
                            activation='relu'))
    model.add(layers.Dense(units=128,
                            activation='relu'))
        
    model.add(layers.Dense(28, activation='softmax'))

    return model

In [None]:
def train_model(model):
    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )
    
    early_stopping_callback = EarlyStopping(
        monitor='val_accuracy',
        patience=2,
        mode='max',
        restore_best_weights=True
        )
    
    model.fit(
        train_gen,
        epochs=20,
        validation_data=val_gen,
        callbacks=[early_stopping_callback]
        )

# Training the model

In [None]:
NUM_MODELS = 5

for num in range(NUM_MODELS):
    train_gen, val_gen = get_data_generators()
    model = get_model()
    train_model(model)

    model.save(f'../asl_basic_ensemble_{num}.h5')

Found 67200 images belonging to 28 classes.
Found 16800 images belonging to 28 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Found 67200 images belonging to 28 classes.
Found 16800 images belonging to 28 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Found 67200 images belonging to 28 classes.
Found 16800 images belonging to 28 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Found 67200 images belonging to 28 classes.
Found 16800 images belonging to 28 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Found 67200 images belonging to 28 classes.
Found 16800 images belonging to 28 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
