# Data loading and preprocessing

In [1]:
import cv2
import os
import numpy as np
import mediapipe as mp
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import time
from torch.utils.data import DataLoader,TensorDataset, SubsetRandomSampler, ConcatDataset, random_split
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision import datasets
import torch
from torch import nn
from PIL import Image

labels = {0:'A', 1:'B', 2:'C',3:'D', 4:'del',5:'E', 6:'F', 7:'G', 8:'H', 9:'I', 10:'J', 11:'K', 12:'L', 13:'M', 14:'N',15:'O', 16:'P', 17:'Q', 18:'R', 19:'S', 20:'space',21:'T', 22:'U', 23:'V', 24:'W', 25:'X', 26:'Y', 27:'Z'}
#Hand detection module
cap = cv2.VideoCapture(0)
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_model = mpHands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5)



def get_data(folder):
    x = torch.empty((8400,3,75,75),dtype=torch.float32)
    y = []
    transforms = ToTensor()
    listdir = os.listdir(folder)
    for i in range(len(listdir)):
        sublistdir = os.listdir(os.path.join(folder,listdir[i]))
        for j in range (300) :
            path = os.path.join(folder,listdir[i],sublistdir[j])
            img = cv2.imread(path)
            img = cv2.resize(img,(75,75))
            #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = transforms(img)
            x[(i*300)+j] = img
            values = list(labels.values())
            keys = list(labels.keys())
            y.append(keys[values.index(listdir[i])])
    y = torch.LongTensor(y)
    return x,y

In [2]:
def get_box(image,landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_array = np.empty((0, 2), int)
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point = [np.array((landmark_x, landmark_y))]
        landmark_array = np.append(landmark_array, landmark_point, axis=0)
    x, y, w, h = cv2.boundingRect(landmark_array)
    if x-10>0 and y-10>0 and x+w+10<image.shape[1] and y+h+10<image.shape[0]:
        return [x-10, y-10, x + w + 10, y + h +10]
    else:
        if x-10<=0:
            x-=x-10
        if y-10<=0:
            y-=y-10
        if x+w+10>=image.shape[1]:
            w-= x+w+10-image.shape[1]
        if y+h+10>=image.shape[0]:
            h-=y+h+10-image.shape[0]
        return [x-10, y-10, x + w + 10, y + h +10]

In [3]:
x,y = get_data('data_processed')
dataset = TensorDataset(x,y)
train_set, test_set = random_split(dataset, [6800,1600])
train_dataloader = DataLoader(train_set,batch_size=32,shuffle=True)
test_dataloader = DataLoader(test_set,batch_size=32,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

# CNN

In [5]:
#definition of CNN
class myCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3,6,3)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,15,3)
        self.fc1 = nn.Linear(15*34*34,100)
        self.fc2 = nn.Linear(100,60)
        self.fc3 = nn.Linear(60,28)
        self.relu = nn.ReLU()

    def forward(self,x):
        # first convolution
        x = self.conv1(x)
        x = self.relu(x)
        
        x=self.pool(x)

        # second convolution
        x = self.conv2(x)
        x = self.relu(x)
                
        # fully connected
        x = torch.flatten(x,1) # flatten all dimensions except the batch

        # fc1
        x = self.fc1(x)
        x = self.relu(x)

        # fc2
        x = self.fc2(x)
        x = self.relu(x)

        # fc out
        x = self.fc3(x)

        return x

In [6]:
#instantiation of model
model = myCNN().to(device)
#initialization of learning parameters
learning_rate = 1e-3
epochs = 5
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(),learning_rate) 

In [7]:
def trainingLoop(train_dataloader, model, loss_fn, optimizer):

    for batch, (X,y) in enumerate(train_dataloader):
        # move data on gpu
        X = X.to(device)
        X=X.float()
        y = y.to(device)

        pred = model(X)
        loss = loss_fn(pred,y)

        # backpropagation 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss = loss.item()
            print(f"The loss is {loss}")

In [8]:
def testingLoop(test_dataloader, model, loss_fn):
    model.eval()
    print_size = len(test_dataloader.dataset)
    num_batches = len(test_dataloader)
    test_loss = 0.0
    correct = 0

    with torch.no_grad():
        for X,y in test_dataloader:
            X,y = X.to(device), y.to(device)
            X=X.float()
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1) == y).sum().item()

    test_loss = test_loss/num_batches
    correct = correct / print_size

    print(f"Testing accuracy: {correct * 100}, Average loss: {test_loss}")

# Training and testing

In [None]:
for e in range(epochs):
    trainingLoop(train_dataloader,model,loss_fn,optimizer)
    testingLoop(test_dataloader,model,loss_fn)

In [10]:
torch.save(model.state_dict(),'ASL_model.pth')