<a href="https://colab.research.google.com/github/hendrikyong/CVNL_Assignment_1/blob/main/CVNL_P02_GP01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
#getting dataset
import kagglehub
import os

#download latest version
path = kagglehub.dataset_download("grassknoted/asl-alphabet")

#list files in the dataset folder
print("Path to dataset files:", path)
files = os.listdir(path)
print("Files in the dataset:", files)

Path to dataset files: /root/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1
Files in the dataset: ['asl_alphabet_train', 'asl_alphabet_test']


In [54]:
#imports
import os
import matplotlib.pyplot as plt
import torch
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

In [55]:
#define the paths to the training and testing data directories
train_dir = '/root/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1/asl_alphabet_train/asl_alphabet_train'
test_dir = '/root/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1/asl_alphabet_test/asl_alphabet_test'

In [56]:
#data preprocessing
'''
1. Load and organize data into a usable format.
- data is already into usable format
2. Resize and normalize the images to ensure consistency and optimal input for CNN.
3. Apply data augmentation on the training data to avoid overfitting and enhance generalization.
4. Split data into batches using DataLoader to handle larger datasets and speed up training.
'''

mean = torch.tensor([0.485, 0.456, 0.406])
std = torch.tensor([0.229, 0.224, 0.225])

#data augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  #randomly flip the image horizontally
    transforms.RandomRotation(20),  #randomly rotate by a degree (-20 to 20)
    transforms.RandomResizedCrop(224),  #randomly crop and resize the image to 224x224
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  #random color adjustments
    transforms.Resize((224, 224)),  #resize to 224x224, if needed
    transforms.ToTensor(),  #convert image to a tensor
    transforms.Normalize(mean=mean, std=std),  #normalize with mean and std
])

#transformation to resize, normalize, and convert images to a tensor
transform = transforms.Compose([
    transforms.Resize((224, 224)),  #resize images to 224x224
    transforms.ToTensor(),  #convert the image to a pytorch tensor
    transforms.Normalize(mean=mean , std=std), #normalization
])

#load and transform dataset
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

#create DataLoader for batching the data
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

#get one image and its label from the training dataset
x_example, y_example = train_dataset[0]
print(x_example.shape)

torch.Size([3, 224, 224])
