In [44]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.optim as optim

In [45]:
DATA_DIR = "C:/Users/15105/Downloads/aptos2019-blindness-detection"

train_csv_path = os.path.join(DATA_DIR, "train.csv")
test_csv_path = os.path.join(DATA_DIR, "test.csv")
train_img_dir = os.path.join(DATA_DIR, "train_images")
test_img_dir = os.path.join(DATA_DIR, "test_images")
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)
print(train_df.head())
print(test_df.head())

        id_code  diagnosis
0  000c1434d8d7          2
1  001639a390f0          4
2  0024cdab0c1e          1
3  002c21358ce6          0
4  005b95c28852          0
        id_code
0  0005cfc8afb6
1  003f0afdcd15
2  006efc72b638
3  00836aaacf06
4  009245722fa4


In [46]:
class RetinopathyDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, has_labels=True):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.has_labels = has_labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # ALWAYS add the extension
        img_name = row["id_code"] + ".png"
        image_path = os.path.join(self.img_dir, img_name)

        # Debug if file missing
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image not found: {image_path}")

        # Open image
        img = Image.open(image_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        if self.has_labels:
            label = int(row["diagnosis"])
            return img, img_name, label

        return img, img_name

In [52]:
train_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

test_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [53]:
# TRAIN
train_dataset = RetinopathyDataset(
    df=train_df,
    img_dir=train_img_dir,
    transform=train_transforms,
    has_labels=True
)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True
)

# TEST (no labels)
test_dataset = RetinopathyDataset(
    df=test_df,
    img_dir=test_img_dir,
    transform=test_transforms,
    has_labels=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False
)

In [54]:
class SimpleDRCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SimpleDRCNN, self).__init__()
        
        # Conv layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # -> 32 x 128 x 128
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # -> 64 x 64 x 64
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # -> 128 x 32 x 32
        
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        
        # Compute flattened feature size: 128 channels, 32x32 after pooling
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)  # -> 32 x 64 x 64
        
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # -> 64 x 32 x 32
        
        x = F.relu(self.conv3(x))
        x = self.pool(x)  # -> 128 x 16 x 16
        
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [55]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SimpleDRCNN(num_classes=5).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [57]:
num_epochs = 3

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    for i, (imgs, _, labels) in enumerate(train_loader):
        imgs = imgs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        print(i)
        if (i+1) % 10 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

0
1
2
3
4
5
6
7
8
9
Epoch [1/3], Step [10/58], Loss: 1.0090
10
11
12
13
14
15
16
17
18
19
Epoch [1/3], Step [20/58], Loss: 0.8146
20
21
22
23
24
25
26
27
28
29
Epoch [1/3], Step [30/58], Loss: 0.7759
30
31
32
33
34
35
36
37
38
39
Epoch [1/3], Step [40/58], Loss: 0.7499
40
41
42
43
44
45
46
47
48
49
Epoch [1/3], Step [50/58], Loss: 1.0296
50
51
52
53
54
55
56
57
0
1
2
3
4
5
6
7
8
9
Epoch [2/3], Step [10/58], Loss: 0.6152
10
11
12
13
14
15
16
17
18
19
Epoch [2/3], Step [20/58], Loss: 0.9305
20
21
22
23
24
25
26
27
28
29
Epoch [2/3], Step [30/58], Loss: 0.7312
30
31
32
33
34
35
36
37
38
39
Epoch [2/3], Step [40/58], Loss: 0.7935
40
41
42
43
44
45
46
47
48
49
Epoch [2/3], Step [50/58], Loss: 0.7265
50
51
52
53
54
55
56
57
0
1
2
3
4
5
6
7
8
9
Epoch [3/3], Step [10/58], Loss: 0.6292
10
11
12
13
14
15
16
17
18
19
Epoch [3/3], Step [20/58], Loss: 0.6239
20
21
22
23
24
25
26
27
28
29
Epoch [3/3], Step [30/58], Loss: 0.7629
30
31
32
33
34
35
36
37
38
39
Epoch [3/3], Step [40/58], Loss: 0.8084
40