In [15]:
# Load libraries
import os
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms


In [16]:
# Dataset loader function

def load_training_dataset(csv_path, img_dir, transform=None):
    """
    Loads training images and labels from a CSV and image directory,
    and returns a PyTorch Dataset object.
    
    Parameters:
        csv_path (str): Path to the labels CSV file.
        img_dir (str): Path to the folder containing image files.
        transform (torchvision.transforms): Transformations to apply to each image.
    
    Returns:
        torch.utils.data.Dataset: A dataset object to use with DataLoader.
    """
    
    class ImageDataset(torch.utils.data.Dataset):
        def __init__(self, csv_path, img_dir, transform=None):
            self.labels_df = pd.read_csv(csv_path)
            self.img_dir = img_dir
            self.transform = transform

        def __len__(self):
            return len(self.labels_df)

        def __getitem__(self, idx):
            img_name = str(self.labels_df.iloc[idx, 0]) + ".png"  # Convert to string and add .png if needed
            label = int(self.labels_df.iloc[idx, 1])  # Ensure label is an integer
            img_path = os.path.join(self.img_dir, img_name)
            image = Image.open(img_path).convert("RGB")  # You're using 3 channels

            if self.transform:
                image = self.transform(image)

            return image, label


    return ImageDataset(csv_path, img_dir, transform)

In [17]:
# Load the dataset using the function above 

from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # for RGB channels
])


# Paths
csv_path = "data/train/labels_train.csv"
img_dir = "data/train/"

# Get the dataset
train_dataset = load_training_dataset(csv_path, img_dir, transform=transform)

# Wrap it in a DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [18]:
# CNN architecture

import torch
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNNModel, self).__init__()
        
        # Convolutional Layer 1: Input 3 channels (RGB), output 16 channels (feature maps), 3x3 kernel
        self.conv1 = nn.Conv2d(in_channels=input_channels, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Convolutional Layer 2: Input 16 channels, output 32 channels, 3x3 kernel
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        # Convolutional Layer 3: Input 32 channels, output 64 channels, 3x3 kernel
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)

        # Fully connected layer to map to 26 classes
        self.fc1 = nn.Linear(64 * 8 * 8, 512)  # Flattened size: 64 channels * 8x8 spatial size
        self.fc2 = nn.Linear(512, num_classes)  # Output 26 classes

    def forward(self, x):
        # Pass input through the layers
        x = self.pool1(torch.relu(self.conv1(x)))  # ReLU activation after conv1 + pooling
        x = self.pool2(torch.relu(self.conv2(x)))  # ReLU activation after conv2 + pooling
        x = self.pool3(torch.relu(self.conv3(x)))  # ReLU activation after conv3 + pooling

        # Flatten the output for the fully connected layer
        x = x.view(-1, 64 * 8 * 8)  # Flatten the tensor to shape (batch_size, 64 * 8 * 8)
        
        # Fully connected layers
        x = torch.relu(self.fc1(x))  # ReLU activation after fc1
        x = self.fc2(x)  # Output layer (no activation function here, since we use CrossEntropyLoss)

        return x
    
# Instantiate your model (e.g., with 26 classes for your problem)
model = CNNModel(input_channels=3,num_classes=26)  # Adjust the number of classes as needed


In [19]:
from torch.utils.data import DataLoader, Subset
import numpy as np

# Let's say 'train_dataset' is your full training dataset object
subset_indices = np.random.choice(len(train_dataset), size=500, replace=False)  # choose 500 random samples
subset = Subset(train_dataset, subset_indices)

# Create a DataLoader for the subset
subset_loader = DataLoader(subset, batch_size=32, shuffle=True)

In [20]:
# Test the model with a subset of the dataset

import torch.optim as optim

# Define the loss function
criterion = torch.nn.CrossEntropyLoss()  # For multi-class classification

# Define the optimizer (Adam is a popular choice)
optimizer = optim.Adam(model.parameters(), lr=0.001)  # You can adjust the learning rate as needed

# Number of epochs you want to train for
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    
    # Loop over the batches of data from the subset DataLoader
    for inputs, labels in subset_loader:
        
        # Zero the gradients from the previous step
        optimizer.zero_grad()
        
        # Forward pass: Compute predicted outputs by passing inputs to the model
        outputs = model(inputs)
        
        # Compute the loss
        loss = criterion(outputs, labels)
        
        # Backward pass: Compute gradients
        loss.backward()
        
        # Update model parameters
        optimizer.step()
        
        # Update running loss (for monitoring)
        running_loss += loss.item()

    # Calculate average loss for this epoch
    avg_loss = running_loss / len(subset_loader)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

Epoch 1/5, Loss: 3.2700
Epoch 2/5, Loss: 3.2563
Epoch 3/5, Loss: 3.2493
Epoch 4/5, Loss: 3.2367
Epoch 5/5, Loss: 3.2232
