<a href="https://colab.research.google.com/github/emanalytic/Sign-Language-Detection/blob/main/resnet50_signdetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import cv2
from PIL import *
import matplotlib.pyplot as plt
import glob
import torch
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import time
from sklearn.model_selection import train_test_split
from tempfile import TemporaryDirectory
import torch.backends.cudnn as cudnn
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

cudnn.benchmark = True
plt.ion() ## interactive mode

In [None]:
### Signs
base_dir = '/kaggle/input/sign-language-detection-using-images/data'
for e in os.scandir(base_dir):
    if e.is_dir():
        no_files = len(os.listdir(e.path))
        print(f'{e.name}: {no_files} Images')

In [None]:
## visualizing images from each class ###

def display_img(base_dir, num_images=3):
    class_names = os.listdir(base_dir)
    fig, axs = plt.subplots(len(class_names), num_images, figsize=(15, len(class_names) * 3))

    for i, class_name in enumerate(class_names):
        class_path = os.path.join(base_dir, class_name)
        image_files = os.listdir(class_path)
        for j in range(min(num_images, len(image_files))):
            img_path = os.path.join(class_path, image_files[j])
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axs[i, j].imshow(img)
            axs[i, j].axis('off')
            axs[i, j].set_title(class_name)

    plt.tight_layout()
    plt.show()

In [None]:
display_img(base_dir)

In [None]:
files = [f for f in glob.glob(base_dir + "//*//*")]
np.random.shuffle(files)
labels = [os.path.dirname(f).split('/')[-1] for f in files]
data = zip(files, labels)
df = pd.DataFrame(data, columns=['images', 'labels'])
df

In [None]:
df['labels'].nunique()

In [None]:
class_names = np.array(df['labels'].unique())
print(class_names)

class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}
print(class_to_idx)

In [None]:
img = cv2.imread(df['images'][0])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

print(img.shape)
img = cv2.resize(img, (256, 256))
plt.imshow(img)
plt.show()

In [None]:
train, test = train_test_split(df, test_size=0.2, random_state=42)
train, val = train_test_split(train, test_size=0.3, random_state=42)

print(train.shape, val.shape, test.shape)

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx, 0]
        label = self.df.iloc[idx, 1]
        if label in class_to_idx:
            label = class_to_idx[label]
        else:
            raise ValueError(f"Label {label} not found in class_to_idx dictionary.")

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)

        if self.transform:
            img = self.transform(img)
        return img, label

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}


In [None]:
train_dataset = CustomImageDataset(train, transform=data_transforms['train'])
val_dataset = CustomImageDataset(val, transform=data_transforms['val'])

In [None]:
dataloaders = {
    'train': torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4),
    'val': torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=True, num_workers=4)
}

dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}

class_names = df['labels'].unique()
class_names

In [None]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


### getting a batch of training data
inputs, classes = next(iter(dataloaders['train']))

### making a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

## **Training the model**

In [None]:
def train(model, criterion, optimizer, scheduler, num_epochs=24):
    since = time.time()

    # temporary dir to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('='*10)

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0

                with tqdm(total=len(dataloaders[phase]), desc=f'{phase} Epoch {epoch + 1}', leave=False) as pbar:
                    for inputs, labels in dataloaders[phase]:
                        inputs = inputs.to(device)
                        labels = labels.to(device)

                        optimizer.zero_grad()

                        ### forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = model(inputs)
                            _, preds = torch.max(outputs, 1)
                            loss = criterion(outputs, labels)

                            # backward + optimizer only if in training phase
                            if phase == 'train':
                                loss.backward()
                                optimizer.step()


                            ### statistics
                        running_loss += loss.item() * inputs.size(0)
                        running_corrects += torch.sum(preds == labels.data)

                        pbar.update(1)

                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params, weights_only=True))
    return model


## **Visualizing Model Predictions**

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

## **Finetuning the ConvNet(Resnet)**

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
model_ft = models.resnet50(weights='IMAGENET1K_V1')
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, 35)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)

# decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
model_ft = train(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)

In [None]:
visualize_model(model_ft)

In [None]:
### helper function to display the images
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean  ### denormalize
    inp = np.clip(inp, 0, 1)  # clipping values to [0, 1]

    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

## **Model Evaluation**

In [None]:
def evaluate_and_visualize(model, dataloader, class_names, num_images=8):
    model.eval()
    all_preds = []
    all_labels = []
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():  #### disable gradient calculation
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            for i in range(inputs.size(0)):
                if images_so_far >= num_images:
                    break

                images_so_far += 1
                ax = plt.subplot(2, 4, images_so_far)
                ax.axis('off')

                img = inputs[i].cpu()
                label = labels[i].cpu().item()
                pred = preds[i].cpu().item()

                imshow(img, title=f'True: {class_names[label]}\nPred: {class_names[pred]}')

                if images_so_far >= num_images:
                    break

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return all_labels, all_preds, accuracy, f1

In [None]:
test_dataset = CustomImageDataset(test, data_transforms['val'])
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4)

all_labels, all_preds, accuracy, f1 = evaluate_and_visualize(model_ft, test_loader, class_names, num_images=8)

In [None]:
print('Classification Report:')
print(classification_report(all_labels, all_preds, target_names=class_names))
print('='*40)
print('Confusion Matrix:')
print(confusion_matrix(all_labels, all_preds))
print('='*40)

print('Accuarcy:', accuracy)

print('F1 Score: ', f1)

## Prediction on Sample Image (Not Included in the given Dataset)

In [None]:
def predict_image(model, image_path, transform, class_names):
    model.eval()

    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)

    image = image.to(device)

    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)

    predicted_class = class_names[predicted.item()]

    plt.imshow(Image.open(image_path))
    plt.title(f'Predicted: {predicted_class}')
    plt.axis('off')
    plt.show()

In [None]:
image_path = '/kaggle/input/sampleimage/images.jpg'
predict_image(model_ft, image_path, data_transforms['val'], class_names)

---
---