In [None]:
import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'


## Import dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader, random_split
import torch

class CustomImageDataset(Dataset):
  def __init__(self, folder_path, image_size=(28, 28)):
      self.data = []
      self.labels = []
      self.image_size = image_size
      self.label_encoder = LabelEncoder()

      # Get folder names as class labels and encode them
      self.folder_names = sorted(os.listdir(folder_path))
      self.label_encoder.fit(self.folder_names)

      for label_name in self.folder_names:
          label_folder = os.path.join(folder_path, label_name)
          if not os.path.isdir(label_folder):
              continue

          for filename in os.listdir(label_folder):
              img_path = os.path.join(label_folder, filename)
              if not img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                  continue

              try:
                  img = Image.open(img_path).convert('L')  # Convert to grayscale
                  img = img.resize(self.image_size)
                  img_array = np.array(img)/255.0
                  self.data.append(img_array)
                  self.labels.append(label_name)
              except Exception as e:
                  print(f"Error loading image {img_path}: {e}")

      # Convert lists to numpy arrays and transform labels to numerical values
      self.data = np.array(self.data)
      self.labels = self.label_encoder.transform(self.labels)

  def __len__(self):
      return len(self.data)

  def __getitem__(self, idx):
      image = torch.tensor(self.data[idx], dtype=torch.float32).unsqueeze(0)
      label = torch.tensor(self.labels[idx], dtype=torch.long).unsqueeze(0)
      return image, label


In [None]:
def load_images_from_drive(folder_path, image_size=(28, 28)):

    print(f"Loading images from {folder_path}...")
    data, labels = [], []
    label_encoder = LabelEncoder()

    # Get the list of folder names
    folder_names = sorted(os.listdir(folder_path))
    label_encoder.fit(folder_names)  # Fit the encoder with folder names as labels

    for label_name in folder_names:
        label_folder = os.path.join(folder_path, label_name)
        if not os.path.isdir(label_folder):
            continue

        for filename in os.listdir(label_folder):
            img_path = os.path.join(label_folder, filename)
            if not img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue

            try:
                # Load and preprocess the image
                img = Image.open(img_path).convert('L')  # Convert to grayscale
                img = img.resize(image_size)  # Resize to the target size
                img_array = np.array(img).flatten() / 255.0  # Flatten and normalize
                data.append(img_array)
                labels.append(label_name)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")

    data = np.array(data)
    labels = label_encoder.transform(labels)  # Encode labels as integers
    print(f"Loaded {len(data)} images from {len(folder_names)} classes.")
    return data, labels, label_encoder

In [None]:
folder_path = '/content/drive/MyDrive/DATASET'
dataset = None
class_names = []
train_dataloader, val_dataloader, label_encoder = get_dataloader(dataset, folder_path, image_size=(28, 28), batch_size=32, class_names = class_names)

Train DataLoader created with 1084 images.
Validation DataLoader created with 271 images.


In [None]:
for images, labels in train_dataloader:
  # print(images.shape)  # e.g., torch.Size([32, 784]) for batch of 32 flattened 28x28 images
  print(labels.shape)  # e.g., torch.Size([32]) for batch of labels
  # plt.imshow(images[0].squeeze(), cmap = 'grey')
  # print(labels[0])
  # break  # Only check the first batch

torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([32, 1])
torch.Size([28, 1])


In [None]:
print(class_names)


['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'Q', 'R', 'T', 'W', 'Y', 'i', 'o', 'p', 's', 'u', 'v', 'x', 'z']


In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader)) ##iter used to access the iterable of dataloader to access batch itself
train_features_batch.shape, train_labels_batch.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32, 1]))

In [None]:
def accuracy_fn(y_true, y_pred):
  correct = (y_true == y_pred).float()  # Convert boolean to float (True=1, False=0)
  acc = correct.sum() / len(y_pred) * 100  # Percentage accuracy
  return acc.item()

In [None]:
class TinyVGG(nn.Module):
    """
    Model architecture copying TinyVGG from:
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3, # how big is the square that's going over the image?
                      stride=1, # default
                      padding=1),# options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) # default stride value is same as kernel_size
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Where did this in_features shape come from?
            # It's because each layer of our network compresses and changes the shape of our input data.
            nn.Linear(in_features=hidden_units*7*7,
                      out_features=output_shape)
        )

    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        # print(x.shape)
        x = self.block_2(x)
        # print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x

torch.manual_seed(42)
model_2 = TinyVGG(input_shape=1,
    hidden_units=10,
    output_shape=len(class_names)).to(device)
model_2

TinyVGG(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=26, bias=True)
  )
)

In [None]:
def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn,
               device: torch.device = device):
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
      for X, y in data_loader:
        # Send data to the target device
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss += loss_fn(y_pred.float(), y.squeeze())
        acc += accuracy_fn(y_true=y.squeeze(), y_pred=y_pred.argmax(dim=1))

      # Scale loss and acc
      loss /= len(data_loader)
      acc /= len(data_loader)
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

In [None]:
def train_step1(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    model.to(device)
    for batch, (X, y) in enumerate(data_loader):
      # Send data to GPU
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_pred = model(X)
      # print(y_pred)
      # print(y)
      # 2. Calculate loss
      loss = loss_fn(y_pred.float(), y.squeeze())
      train_loss += loss
      train_acc += accuracy_fn(y_true=y.squeeze(),
                                y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step1(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval() # put model in eval mode
    # Turn on inference context manager
    with torch.inference_mode():
      for X, y in data_loader:
        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        test_pred = model(X)

        # 2. Calculate loss and accuracy
        test_loss += loss_fn(test_pred.float(), y.squeeze())
        test_acc += accuracy_fn(y_true=y.squeeze(),
            y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
        )
      # print(len(data_loader))
      # print(test_acc)
      # Adjust metrics and print out
      test_loss /= len(data_loader)
      test_acc /= len(data_loader)
      print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(),
                             lr=0.1)

In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32, 1]))

In [None]:
epochs = 10
for epoch in range(epochs):
    print(f"Epoch: {epoch}\n---------")
    train_step1(data_loader=train_dataloader,
        model=model_2,
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step1(data_loader=val_dataloader,
        model=model_2,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

Epoch: 0
---------
Train loss: 1.00550 | Train accuracy: 69.47%
9
613.1250038146973
Test loss: 1.03670 | Test accuracy: 68.13%

Epoch: 1
---------
Train loss: 0.61707 | Train accuracy: 82.31%
9
671.6666679382324
Test loss: 0.79877 | Test accuracy: 74.63%

Epoch: 2
---------
Train loss: 0.44840 | Train accuracy: 86.52%
9
695.625
Test loss: 0.72804 | Test accuracy: 77.29%

Epoch: 3
---------
Train loss: 0.34634 | Train accuracy: 90.24%
9
693.5416679382324
Test loss: 0.88867 | Test accuracy: 77.06%

Epoch: 4
---------
Train loss: 0.23820 | Train accuracy: 93.08%
9
741.2500038146973
Test loss: 0.62175 | Test accuracy: 82.36%

Epoch: 5
---------
Train loss: 0.13338 | Train accuracy: 96.59%
9
766.6666717529297
Test loss: 0.59729 | Test accuracy: 85.19%

Epoch: 6
---------
Train loss: 0.12203 | Train accuracy: 96.59%
9
743.9583358764648
Test loss: 0.65093 | Test accuracy: 82.66%

Epoch: 7
---------
Train loss: 0.06940 | Train accuracy: 97.98%
9
756.0416679382324
Test loss: 0.70566 | Test accu

In [None]:
# Calculate model 0 results on test dataset
model_2_results = eval_model(model=model_2, data_loader=val_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn
)
model_2_results

{'model_name': 'TinyVGG',
 'model_loss': 1.1380828619003296,
 'model_acc': 203.44907463921442}

In [None]:
len(val_dataloader)

9