In [55]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm

from torch.utils.data import Dataset

import os
import pandas as pd
from torchvision.io import read_image

import matplotlib.pyplot as plt
from PIL import Image

In [None]:
#Pytorch uses object-oriented programming and classes to build models. Here, we define a simple convolutional neural network (CNN)

class CNN(nn.Module):
    def __init__(self, in_channels, num_classes=3):
        """
        Define the layers of the convolutional neural network.

        Parameters:
            in_channels: int
                The number of channels in the input image.             num_classes: int
                The number of classes we want to predict, in our case 3
        """
        super(CNN, self).__init__()

        # First convolutional layer: 1 input channel, 8 output channels, 3x3 kernel, stride 1, padding 1
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1)
        # Max pooling layer: 2x2 window, stride 2
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # Second convolutional layer: 8 input channels, 16 output channels, 3x3 kernel, stride 1, padding 1
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        """
        Define the forward pass of the neural network.

        Parameters:
            x: torch.Tensor
                The input tensor.

        Returns:
            torch.Tensor
                The output tensor after passing through the network.
        """
        x = F.relu(self.conv1(x))  # Apply first convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = F.relu(self.conv2(x))  # Apply second convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = x.reshape(x.shape[0], -1)  # Flatten the tensor
        x = self.fc1(x)            # Apply fully connected layer
        return x

In [127]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Learning rate, batch size, and num_epochs can all be changed - see if you can increase the accuracy!

input_size = 28*28  # 28x28 pixels (not directly used in CNN)
num_classes = 3  
learning_rate = 0.001 # 
batch_size = 64
num_epochs = 10 

In [None]:
# We again use a class to create a Dataset object which we will use to load our images in

class ImageDataset(Dataset):

    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform  # Use what the user passes
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_name = self.img_labels.iloc[idx, 0]
        label = self.img_labels.iloc[idx, 1]
        img_path = os.path.join(self.img_dir, img_name)

        image = Image.open(img_path).convert('RGBA')  # Ensures 4 channels

        if self.transform:
            image = self.transform(image)

        if self.target_transform:
            label = self.target_transform(label)

        return image, label


In [None]:
# We resize our images to be compatible with the model architecture we made

transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Resize to match model expectations
    transforms.ToTensor()           # Convert to tensor with shape [C, H, W]
    ])

In [None]:
#Here, we load in our data. Make sure the img_dir path is correct!

particle_data = ImageDataset('labels.csv', img_dir='aws_deep_learning/aws_hackathon_data/all_images', transform=transform)

In [None]:
# We create a DataLoader object, which will pass our dataset into the model.
# Batch size changes the number of images the model sees during each pass, and shuffle ensures they will differ between passes. 

train_loader = DataLoader(dataset=particle_data, batch_size=batch_size, shuffle=True)

In [133]:
model = CNN(in_channels=4, num_classes=num_classes).to(device)
model

CNN(
  (conv1): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=784, out_features=3, bias=True)
)

In [134]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
#Now we train our model!

for epoch in range(num_epochs):
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    for batch_index, (data, targets) in enumerate(tqdm(train_loader)):
        # Move data and targets to the device (GPU/CPU)
        data = data.to(device)
        targets = targets.to(device)

        # Forward pass: compute the model output
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass: compute the gradients
        optimizer.zero_grad()
        loss.backward()

        # Optimization step: update the model parameters
        optimizer.step()

Epoch [1/10]


100%|██████████| 24/24 [00:01<00:00, 14.01it/s]


Epoch [2/10]


100%|██████████| 24/24 [00:01<00:00, 14.43it/s]


Epoch [3/10]


100%|██████████| 24/24 [00:01<00:00, 14.06it/s]


Epoch [4/10]


100%|██████████| 24/24 [00:01<00:00, 14.33it/s]


Epoch [5/10]


100%|██████████| 24/24 [00:01<00:00, 14.18it/s]


Epoch [6/10]


100%|██████████| 24/24 [00:01<00:00, 14.27it/s]


Epoch [7/10]


100%|██████████| 24/24 [00:01<00:00, 14.38it/s]


Epoch [8/10]


100%|██████████| 24/24 [00:01<00:00, 14.33it/s]


Epoch [9/10]


100%|██████████| 24/24 [00:01<00:00, 14.38it/s]


Epoch [10/10]


100%|██████████| 24/24 [00:01<00:00, 14.39it/s]


In [136]:
def check_accuracy(loader, model):
    """
    Checks the accuracy of the model on the given dataset loader.

    Parameters:
        loader: DataLoader
            The DataLoader for the dataset to check accuracy on.
        model: nn.Module
            The neural network model.
    """
    # if loader.dataset.train:
    #     print("Checking accuracy on training data")
    # else:
    #     print("Checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculation
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            # Forward pass: compute the model output
            scores = model(x)
            _, predictions = scores.max(1)  # Get the index of the max log-probability
            num_correct += (predictions == y).sum()  # Count correct predictions
            num_samples += predictions.size(0)  # Count total samples

        # Calculate accuracy
        accuracy = float(num_correct) / float(num_samples) * 100
        print(f"Got {num_correct}/{num_samples} with accuracy {accuracy:.2f}%")
    
    model.train()  # Set the model back to training mode

# Final accuracy check on training and test sets
check_accuracy(train_loader, model)
#check_accuracy(test_loader, model)

Got 780/1500 with accuracy 52.00%
