In [None]:
import pandas as pd
import os
import PIL
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

In [None]:
data = pd.read_csv('image_output.csv') # Change to your csv file with the file_paths and labels
data['file_path'] = data['file_path'].apply(lambda x: os.path.basename(x))
data['label'] = data['label'].replace(0, 5)
data['label'] = data['label'].astype(str)
data.rename(columns = {'file_path':'filename'}, inplace = True)
data.head(5)

In [None]:
data.to_csv('cnn_filename_labels.csv', index=False)

In [None]:
from torchvision import transforms
input_size = 224

transform = transforms.Compose([
    transforms.Resize((input_size, input_size)),  # Resize all images to the same size
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, images_folder, transform = None):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        filename = self.df.iloc[index]["filename"]
        label = self.df.iloc[index]["label"] - 1
        
        image = PIL.Image.open(os.path.join(self.images_folder, filename)).convert('RGB')
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

In [None]:
train_dataset = CustomDataset("cnn_filename_labels.csv", "./image_outputs", transform=transform)

In [None]:
from torch.utils.data import random_split


train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes, input_size=input_size):
        super(SimpleCNN, self).__init__()
        # Convolutional Layer 1
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.act1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Convolutional Layer 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Convolutional Layer 3
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.act3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Convolutional Layer 4
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.act4 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Convolutional Layer 5
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.act5 = nn.ReLU()
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Adjusted size calculation after 5 pooling layers
        size_after_conv = input_size // 32  # Each pooling layer halves the dimension
        self.fc1 = nn.Linear(256 * size_after_conv * size_after_conv, num_classes)

    def forward(self, x):
        x = self.pool1(self.act1(self.conv1(x)))
        x = self.pool2(self.act2(self.conv2(x)))
        x = self.pool3(self.act3(self.conv3(x)))
        x = self.pool4(self.act4(self.conv4(x)))
        x = self.pool5(self.act5(self.conv5(x)))
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

In [None]:
%%time
model = SimpleCNN(num_classes=5, input_size=input_size) 
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

num_epochs = 25  # Number of epochs

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Epoch {epoch+1}, Accuracy: {100 * correct / total}%')

In [None]:
img = PIL.Image.open(f"./labeled_images/0001.png")
imgCropped = img.crop(box= (205, 96, 820, 713))
enhancer = PIL.ImageEnhance.Contrast(imgCropped)
enhanced_image = enhancer.enhance(4.0)
gray_image = enhanced_image.convert("L")
img = transform(gray_image.convert('RGB'))
img = img.unsqueeze(0)

In [None]:
# Test model
with torch.no_grad():
    output = model(img)
    _, predicted = torch.max(output, 1)
    print(f'Predicted class: {predicted.item() + 1}') # Labels for classifier are range 0-4

In [None]:
torch.save(model, './outputs/CNN/CNN_classify_attention_patterns.pth')