# RNN for classifying images into two classes

## The Model

This website is useful for some understanding: https://medium.com/dair-ai/building-rnns-is-fun-with-pytorch-and-google-colab-3903ea9a3a79

## Building the model

Not gonna lie, this model and code was written by ChatGPT - I wanted to try it out and it was pretty good!

On my small dataset, this model has test accuracy of 59.52%. By increasing the size of the dataset and potentially have more neurons (let's say, up to 300).


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
image_size = 150
input_size = image_size * image_size * 3  # Image size: 32x32 with 3 color channels
hidden_size = 128
num_layers = 2
num_classes = 2
batch_size = 100
num_epochs = 10
learning_rate = 0.001

# Data transforms
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),     # Resize the image to 32x32
    transforms.ToTensor(),           # Convert image to tensor
    transforms.Normalize(            # Normalize image channels
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Load the dataset
#train_dataset = ImageFolder("../foreign_vs_clear/train", transform=transform)
#test_dataset = ImageFolder("../foreign_vs_clear/test", transform=transform)

train_dataset = ImageFolder("C:/Users/hanna/Desktop/git/interiorcardamage/Data2/raw/train", transform=transform)
test_dataset = ImageFolder("C:/Users/hanna/Desktop/git/interiorcardamage/Data2/raw/test", transform=transform)

# Data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Recurrent neural network (RNN)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forwardOLD(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.rnn(x, h0.reshape(self.num_layers, x.size(0), self.hidden_size))
        out = self.fc(out[:, -1, :])

        return out

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.rnn(x.unsqueeze(1), h0)
        out = self.fc(out[:, -1, :])

        return out


# Initialize the model
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Reshape images
        images = images.view(-1, input_size)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch}], Loss: {loss.item():.4f}")


Epoch [0], Loss: 0.7440
Epoch [1], Loss: 0.6795
Epoch [2], Loss: 0.6554
Epoch [3], Loss: 0.6202
Epoch [4], Loss: 0.6141
Epoch [5], Loss: 0.6089
Epoch [6], Loss: 0.5977
Epoch [7], Loss: 0.5798
Epoch [8], Loss: 0.5698
Epoch [9], Loss: 0.5597


In [13]:

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        images = images.view(-1, input_size)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 59.52%


In [7]:
image_size * image_size * 3

67500

In [9]:
5467500/(150*150)

243.0