In [56]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from jupyter_server.transutils import trans
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from PIL import Image
import pandas as pd
from sklearn.metrics import accuracy_score
from torch.utils.data import WeightedRandomSampler

# https://stackoverflow.com/questions/72371859/attributeerror-module-collections-has-no-attribute-iterable
import collections
collections.Iterable = collections.abc.Iterable

# import the data

In [49]:
class ImageCSVDataset(Dataset):
    def __init__(self, image_dir, csv_file, transform=None, test_set=False):
        """
        Args:
            image_dir (string): Directory with all the images.
            csv_file (string): Path to the csv file with labels.
            transform (callable, optional): Optional transform to be applied
                on a sample.

        """
        self.image_dir = image_dir
        self.labels = pd.read_csv(csv_file)
        self.transform = transform
        self.test_set = test_set

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.labels.iloc[idx]['filename']  # Assuming image name is in the first column
        image = Image.open(f"{self.image_dir}/{img_name}").convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.test_set:
          return image

        label = self.labels.iloc[idx]['label']  # Assuming label is in the second column
        return image, label

# Define dataset and data loader.
image_dir = 'train/'
csv_file = 'train.csv'
image_dir_test = 'test/'
csv_file_test = 'test.csv'

In [50]:
# Define transformations (optional)
# https://pytorch.org/vision/main/transforms.html
# https://www.kaggle.com/datasets/abhinavnayak/catsvdogs-transformed
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images
    # transforms.RandomResizedCrop((224, 224)),
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),          # Convert to PyTorch tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.225, 0.225, 0.225])  # Normalize
])

In [61]:
dataset = ImageCSVDataset(image_dir=image_dir, csv_file=csv_file, transform=transform)

dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size
print(dataset_size, train_size, val_size)
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for training and validation sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

8653 6922 1731


# fine tune pre-trained model

https://github.com/pytorch/examples/blob/main/imagenet/main.py

In [52]:
# Load a pre-trained model
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer
num_classes = dataset.labels['label'].max() + 1 
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)  # Replace with your number of classes

# Freeze early layers (optional)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze later layers (optional)
for layer in list(model.children())[-3:]:  # Unfreeze last 3 layers
    for param in layer.parameters():
        param.requires_grad = True

# 4. Define optimizer and loss function
# optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

# 6. Train the model (similar to previous example)
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

[1, 100] loss: 1.201
[1, 200] loss: 0.900
[2, 100] loss: 0.739
[2, 200] loss: 0.724
[3, 100] loss: 0.629
[3, 200] loss: 0.627
[4, 100] loss: 0.559
[4, 200] loss: 0.577
[5, 100] loss: 0.525
[5, 200] loss: 0.524
[6, 100] loss: 0.466
[6, 200] loss: 0.511
[7, 100] loss: 0.475
[7, 200] loss: 0.445
[8, 100] loss: 0.433
[8, 200] loss: 0.422
[9, 100] loss: 0.392
[9, 200] loss: 0.406
[10, 100] loss: 0.379
[10, 200] loss: 0.365
[11, 100] loss: 0.337
[11, 200] loss: 0.367
[12, 100] loss: 0.327
[12, 200] loss: 0.341
[13, 100] loss: 0.304
[13, 200] loss: 0.316
[14, 100] loss: 0.309
[14, 200] loss: 0.280
[15, 100] loss: 0.263
[15, 200] loss: 0.260
[16, 100] loss: 0.262
[16, 200] loss: 0.276
[17, 100] loss: 0.245
[17, 200] loss: 0.242
[18, 100] loss: 0.229
[18, 200] loss: 0.218
[19, 100] loss: 0.208
[19, 200] loss: 0.180
[20, 100] loss: 0.179
[20, 200] loss: 0.193


In [53]:
# Validation
model.eval() # toggle evaluation mode
val_loss = 0.0
all_preds = []
all_labels = []

with torch.no_grad():
    for data in val_loader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

model.train() # toggle back training mode

# Calculate validation accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {accuracy:.4f}")

Validation Loss: 0.7601, Accuracy: 0.8163


In [54]:
# Use model to make predictions.

# Create test dataloader that doesn't have labels.
dataset_test = ImageCSVDataset(image_dir=image_dir_test, csv_file=csv_file_test,
                               transform=transform, test_set=True)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=False)

predictions = []
for i, data in enumerate(dataloader_test, 0):
    inputs = data.to(device)
    outputs = model(inputs)
    predictions.append(outputs.argmax(axis=1).detach().cpu().numpy())
predictions = np.concatenate(predictions)

# Write predictions to a submission file.
df_predictions = pd.read_csv(csv_file_test)
df_predictions['predictions'] = predictions
df_predictions[['id', 'predictions']].to_csv('submission.csv', index=False)

display(df_predictions)

Unnamed: 0,id,filename,predictions
0,0,MV1012-BC-12_obj00007.jpg,1
1,1,MV1012-BC-12_obj00009.jpg,3
2,2,MV1012-BC-12_obj00011.jpg,1
3,3,MV1012-BC-12_obj00012.jpg,3
4,4,MV1012-BC-12_obj00013.jpg,5
...,...,...,...
2169,2169,MV1012-BC-8_obj01887.jpg,0
2170,2170,MV1012-BC-8_obj01892.jpg,0
2171,2171,MV1012-BC-8_obj01893.jpg,0
2172,2172,MV1012-BC-8_obj01899.jpg,1
