##You will need a CUDA enabled GPU installed to run this code##

In [1]:
import torchmetrics
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import ast
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the CSV file
labels_df = pd.read_csv('encoded_labels.csv')

# Columns in your CSV
file_column = 'index'  # Replace with your actual filename column name
label_column = 'labels'  # Replace with your actual labels column name

# Directory where your images are stored
image_directory = './images/'  # Ensure this path ends with a '/'

# Function to convert string representation to list of integers
def parse_labels(label_str):
    label_str = label_str.replace(' ', ',')
    return list(map(int, ast.literal_eval(label_str)))

# Create full paths to images and parse labels
image_paths = [os.path.join(image_directory, fname) for fname in labels_df[file_column]]
labels = [parse_labels(label) for label in labels_df[label_column]]

# Split the dataset into training and validation sets
train_image_paths, val_image_paths, train_labels, val_labels = train_test_split(
    image_paths,
    labels,
    test_size=0.2,
    random_state=42
)

# Custom Dataset Class for Grayscale Images
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = torch.tensor(self.labels[idx], dtype=torch.float)

        image = Image.open(image_path).convert('L')  # Convert to grayscale
        if self.transform:
            image = self.transform(image)

        return image, label

# Define the image transformations for grayscale images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Create datasets and dataloaders
train_dataset = CustomDataset(train_image_paths, train_labels, transform=transform)
val_dataset = CustomDataset(val_image_paths, val_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define the modified ResNet-50 model for grayscale images
class ModifiedResNet(nn.Module):
    def __init__(self):
        super(ModifiedResNet, self).__init__()
        self.resnet = resnet50(pretrained=False)
        self.resnet.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=64,
            kernel_size=(7, 7),
            stride=(2, 2),
            padding=(3, 3),
            bias=False
        )
        self.resnet.fc = nn.Linear(
            in_features=self.resnet.fc.in_features,
            out_features=14
        )

    def forward(self, x):
        return self.resnet(x)

model = ModifiedResNet().to(device)

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)

# Define the metric for validation
metric = torchmetrics.classification.MultilabelAccuracy(num_labels=14).to(device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Update learning rate
    scheduler.step()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

# Validation loop
model.eval()
metric.reset()

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        predictions = torch.sigmoid(outputs)
        predicted_labels = (predictions > 0.5).float()

        # Update metric
        metric.update(predicted_labels, labels)

# Compute final metric results
accuracy_per_class = metric.compute()
average_accuracy = accuracy_per_class.mean().item()

print(f'Validation Accuracy per class: {accuracy_per_class.tolist()}')
print(f'Average Validation Accuracy: {100 * average_accuracy}%')

# Save the model
torch.save(model.state_dict(), 'resnet50_scratch.pth')




Epoch 1, Loss: 0.19078092192504423
Epoch 2, Loss: 0.18046236524345183
Epoch 3, Loss: 0.17848887912770536
Epoch 4, Loss: 0.17633838301643412
Epoch 5, Loss: 0.17460506670969597
Epoch 6, Loss: 0.17353926239706946
Epoch 7, Loss: 0.1743860848195164
Epoch 8, Loss: 0.17296187373551916
Epoch 9, Loss: 0.17042738870314672
Epoch 10, Loss: 0.17023769681546705
Validation Accuracy per class: 0.9495798349380493
Average Validation Accuracy: 94.95798349380493%
