# YOLO

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ln -s /content/drive/My\ Drive/runs /content/runs

In [None]:
!ln -s /content/drive/My\ Drive/asl /content/asl

In [None]:
!ln -s /content/drive/MyDrive/object-detection /content/object-detection

In [None]:
!unzip /content/object-detection/hands.v2i.yolov8.zip

In [None]:
!pip install ultralytics

from ultralytics import YOLO

In [None]:
yolo_model = YOLO('yolov10n.pt')
results = yolo_model.train(data='/content/object-detection/data.yaml', epochs=100, imgsz=640, device=0)

In [None]:
yolo_model = YOLO('/content/runs/detect/train10/weights/best.pt')

In [None]:
yolo_model.val()

In [None]:
yolo_model.export(format='tflite')

# CNN

In [1]:
import torch
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import random
import os

In [None]:
!unzip asl.zip

In [3]:
train_dir = '/content/asl_alphabet_train/asl_alphabet_train'

In [4]:
# class made for classification task -> not needed if we use YOLO
!rm -rf '/content/asl_alphabet_train/asl_alphabet_train/nothing'

In [5]:
transform = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor()
])

In [6]:
dataset = datasets.ImageFolder(root=train_dir, transform=transform)

In [7]:
train_ratio = 0.9
train_size = int(train_ratio * len(dataset))
test_size = int((len(dataset) - train_size) / 3)
val_size = test_size * 2

In [8]:
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [None]:
num_train_samples = len(train_dataset)
num_val_samples = len(val_dataset)
num_test_samples = len(test_dataset)

print(f'Number of training samples: {num_train_samples}')
print(f'Number of validation samples: {num_val_samples}')
print(f'Number of test samples: {num_test_samples}')

In [10]:
num_classes = len(dataset.classes)
print(num_classes)

28


In [11]:
#use 128 or 256
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2, pin_memory=True)

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=28):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, 1)
        self.bn3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 23 * 23, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 23 * 23)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class EarlyStopping:
    def __init__(self, patience=10, min_delta=0, verbose=False, path='checkpoint.pt'):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.path = path
        self.best_model = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model)
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.save_checkpoint(val_loss, model)
            self.best_loss = val_loss
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f"Validation loss decreased ({self.best_loss:.6f} --> {val_loss:.6f}). Saving model ...")
        torch.save(model.state_dict(), self.path)
        self.best_model = model

def train_model(model, train_loader, val_loader, criterion, optimizer, patience=10, n_epochs=100, verbose=True):
    early_stopping = EarlyStopping(patience=patience, verbose=verbose)

    for epoch in range(n_epochs):
        # Training
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

        # Check for early stopping
        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            print("Early stopping triggered. Loading the best model...")
            model.load_state_dict(torch.load('checkpoint.pt'))
            break

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cnn_model = SimpleCNN(num_classes=28).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=1e-3)

In [None]:
train_model(cnn_model, train_loader, val_loader, criterion, optimizer, patience=5, n_epochs=50)

Epoch 1/50, Training Loss: 3.0733, Validation Loss: 2.0526
Validation loss decreased (2.052573 --> 2.052573). Saving model ...
Epoch 2/50, Training Loss: 2.3600, Validation Loss: 1.8489
Validation loss decreased (2.052573 --> 1.848851). Saving model ...
Epoch 3/50, Training Loss: 2.2153, Validation Loss: 1.5198
Validation loss decreased (1.848851 --> 1.519756). Saving model ...
Epoch 4/50, Training Loss: 2.1012, Validation Loss: 1.3869
Validation loss decreased (1.519756 --> 1.386910). Saving model ...
Epoch 5/50, Training Loss: 2.0141, Validation Loss: 1.3127
Validation loss decreased (1.386910 --> 1.312657). Saving model ...
Epoch 6/50, Training Loss: 1.9310, Validation Loss: 1.1645
Validation loss decreased (1.312657 --> 1.164485). Saving model ...
Epoch 7/50, Training Loss: 1.8645, Validation Loss: 1.1090
Validation loss decreased (1.164485 --> 1.108969). Saving model ...
Epoch 8/50, Training Loss: 1.7695, Validation Loss: 1.0810
Validation loss decreased (1.108969 --> 1.081029). S

In [None]:
cnn_model = SimpleCNN(num_classes=28)
cnn_model.load_state_dict(torch.load('checkpoint.pt'))

In [15]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

def calculate_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)

    return acc, prec, rec

In [17]:
def test_model(model, test_loader, criterion, device='cpu'):
    model.eval()
    test_loss = 0.0
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    test_loss /= len(test_loader)

    accuracy, precision, recall = calculate_metrics(y_true, y_pred)

    print(f"Test Loss: {test_loss:.4f}")
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

cnn_model.to(device)
test_model(cnn_model, test_loader, criterion, device=device)

Test Loss: 0.0034
Accuracy: 0.9996, Precision: 0.9996, Recall: 0.9996


# Conversion from Pytorch to tflite

In [None]:
!pip install ai-edge-torch-nightly torchvision

In [None]:
import ai_edge_torch
import numpy
import torch
import torchvision

cnn_model.eval()

In [None]:
sample_inputs = (torch.randn(1, 3, 200, 200),)
edge_model = ai_edge_torch.convert(cnn_model, sample_inputs)

In [None]:
edge_model.export('cnn_model.tflite')