In [1]:
import numpy as np 
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.io as io
import torchvision.transforms as transforms
from torchvision.transforms import v2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import os
import torchvision.models as models
from torch.optim import lr_scheduler



In [2]:
%cd bhw1

/home/jupyter/work/resources/bhw1


In [3]:
labels_df = pd.read_csv('labels.csv')
labels_df

Unnamed: 0,Id,Category
0,trainval_00000.jpg,7
1,trainval_00001.jpg,198
2,trainval_00002.jpg,161
3,trainval_00003.jpg,131
4,trainval_00004.jpg,107
...,...,...
99995,trainval_99995.jpg,72
99996,trainval_99996.jpg,194
99997,trainval_99997.jpg,6
99998,trainval_99998.jpg,29


In [4]:
train_df, val_df = train_test_split(labels_df, test_size=0.05, random_state=42)
img_dir = 'trainval' 

In [5]:
class MyDataset(Dataset):
    def __init__(self, img_dir, labels_df, transform=None):
        self.img_dir = img_dir
        self.labels_df = labels_df
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_name)
        image = io.read_image(img_path)  
        label = self.labels_df.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label


In [6]:
transform = v2.Compose([
    v2.ConvertImageDtype(torch.float),
    v2.RandomHorizontalFlip(p=0.5),  
    v2.RandomApply([v2.RandomCrop(40, padding=4)], p=0.5),
    v2.RandomApply([v2.RandomRotation(degrees=15)], p=0.5),
    v2.RandomApply([v2.Grayscale()], p=0.5),
    v2.RandomApply([v2.GaussianBlur(kernel_size=3)], p=0.5),
    v2.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)),
    v2.Normalize((0.5692, 0.5448, 0.4934), (0.1823, 0.1810, 0.1854)) 
])

In [7]:
train_dataset = MyDataset(img_dir=img_dir, labels_df=train_df, transform=transform)
val_dataset = MyDataset(img_dir=img_dir, labels_df=val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

In [8]:
#https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Bottleneck, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)
        
        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()
        
    def forward(self, x):
        identity = x.clone()
        x = self.relu(self.batch_norm1(self.conv1(x)))
        
        x = self.relu(self.batch_norm2(self.conv2(x)))
        
        x = self.conv3(x)
        x = self.batch_norm3(x)
        
        if self.i_downsample is not None:
            identity = self.i_downsample(identity)

        x+=identity
        x=self.relu(x)
        
        return x

class Block(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Block, self).__init__()
       

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
      identity = x.clone()

      x = self.relu(self.batch_norm2(self.conv1(x)))
      x = self.batch_norm2(self.conv2(x))

      if self.i_downsample is not None:
          identity = self.i_downsample(identity)
      print(x.shape)
      print(identity.shape)
      x += identity
      x = self.relu(x)
      return x


        
        
class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        self.in_channels = 64
        
        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
       
        # self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64, stride=1)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.dropout = nn.Dropout(0.75)
        self.fc = nn.Linear(512 * ResBlock.expansion, num_classes)
        

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.conv1(x)))
        # x = self.max_pool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)

        return x
        
    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []
        
        if stride != 1 or self.in_channels != planes*ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )
            
        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion
        
        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))
            
        return nn.Sequential(*layers)

        
        
def ResNet50(num_classes, channels=3):
    return ResNet(Bottleneck, [3,4,6,3], num_classes, channels)

In [9]:
def train(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in loader:
        inputs, labels = inputs.cuda(), labels.cuda()
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [10]:
def validate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.cuda(), labels.cuda()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * inputs.size(0)
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [11]:
model = ResNet50(num_classes=200).cuda()

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
#scheduler = CosineAnnealingLR(optimizer, T_max=50)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
#scheduler = CyclicLR(optimizer, base_lr=1e-4, max_lr=0.1, step_size_up=400, mode='triangular')

In [None]:
num_epochs = 80
best_val_acc = 0.0
train_loss_history = []
train_acc_history = []

val_loss_history = []
val_acc_history = []
save_dir = "./"

for epoch in tqdm(range(num_epochs)):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, criterion)

    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)

    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    scheduler.step(val_loss)

    print(f'Epoch [{epoch+1}/{num_epochs}]')
    print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}%')
    print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.2f}%')
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_path = os.path.join(save_dir, 'best_model.pth')
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_loss_history': train_loss_history,
            'val_loss_history': val_loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history
        }, best_model_path)
        print(f'Best model updated and saved to {best_model_path}')

  0%|          | 0/80 [00:00<?, ?it/s]

Epoch [1/80]
Train Loss: 5.2912 | Train Acc: 0.71%
Val Loss: 5.2502 | Val Acc: 1.12%


  1%|▏         | 1/80 [06:20<8:20:52, 380.41s/it]

Best model updated and saved to ./best_model.pth
Epoch [2/80]
Train Loss: 5.2375 | Train Acc: 1.05%
Val Loss: 5.1926 | Val Acc: 1.54%


  2%|▎         | 2/80 [12:38<8:12:59, 379.22s/it]

Best model updated and saved to ./best_model.pth
Epoch [3/80]
Train Loss: 5.1775 | Train Acc: 1.52%
Val Loss: 5.1237 | Val Acc: 1.84%


  4%|▍         | 3/80 [18:58<8:07:13, 379.66s/it]

Best model updated and saved to ./best_model.pth
Epoch [4/80]
Train Loss: 5.1032 | Train Acc: 2.12%
Val Loss: 6.3105 | Val Acc: 2.16%


  5%|▌         | 4/80 [25:18<8:00:54, 379.66s/it]

Best model updated and saved to ./best_model.pth
Epoch [5/80]
Train Loss: 5.0828 | Train Acc: 2.33%
Val Loss: 5.0219 | Val Acc: 2.68%


  6%|▋         | 5/80 [31:39<7:54:58, 379.98s/it]

Best model updated and saved to ./best_model.pth
Epoch [6/80]
Train Loss: 5.0037 | Train Acc: 3.05%
Val Loss: 4.9402 | Val Acc: 3.40%


  8%|▊         | 6/80 [38:00<7:49:03, 380.32s/it]

Best model updated and saved to ./best_model.pth
Epoch [7/80]
Train Loss: 4.9265 | Train Acc: 3.85%
Val Loss: 4.8260 | Val Acc: 5.02%


  9%|▉         | 7/80 [44:20<7:42:39, 380.26s/it]

Best model updated and saved to ./best_model.pth
Epoch [8/80]
Train Loss: 4.8411 | Train Acc: 5.00%
Val Loss: 4.7826 | Val Acc: 5.84%


 10%|█         | 8/80 [50:39<7:35:53, 379.91s/it]

Best model updated and saved to ./best_model.pth
Epoch [9/80]
Train Loss: 4.7569 | Train Acc: 6.03%
Val Loss: 4.7080 | Val Acc: 6.48%


 11%|█▏        | 9/80 [56:59<7:29:28, 379.84s/it]

Best model updated and saved to ./best_model.pth
Epoch [10/80]
Train Loss: 4.6693 | Train Acc: 7.17%
Val Loss: 4.6012 | Val Acc: 8.60%


 12%|█▎        | 10/80 [1:03:18<7:22:54, 379.64s/it]

Best model updated and saved to ./best_model.pth
Epoch [11/80]
Train Loss: 4.5664 | Train Acc: 8.64%
Val Loss: 4.5407 | Val Acc: 9.28%


 14%|█▍        | 11/80 [1:09:38<7:16:46, 379.80s/it]

Best model updated and saved to ./best_model.pth
Epoch [12/80]
Train Loss: 4.4575 | Train Acc: 10.27%
Val Loss: 4.3883 | Val Acc: 11.50%


 15%|█▌        | 12/80 [1:15:56<7:09:53, 379.32s/it]

Best model updated and saved to ./best_model.pth
Epoch [13/80]
Train Loss: 4.3861 | Train Acc: 11.28%
Val Loss: 4.3024 | Val Acc: 13.46%


 16%|█▋        | 13/80 [1:22:16<7:03:43, 379.45s/it]

Best model updated and saved to ./best_model.pth
Epoch [14/80]
Train Loss: 4.3093 | Train Acc: 12.72%
Val Loss: 4.2551 | Val Acc: 13.54%


 18%|█▊        | 14/80 [1:28:35<6:57:11, 379.27s/it]

Best model updated and saved to ./best_model.pth
Epoch [15/80]
Train Loss: 4.2461 | Train Acc: 13.83%
Val Loss: 4.1993 | Val Acc: 14.88%


 19%|█▉        | 15/80 [1:34:55<6:51:02, 379.42s/it]

Best model updated and saved to ./best_model.pth
Epoch [16/80]
Train Loss: 4.1982 | Train Acc: 14.57%
Val Loss: 4.1657 | Val Acc: 16.56%


 20%|██        | 16/80 [1:41:15<6:45:08, 379.82s/it]

Best model updated and saved to ./best_model.pth


 21%|██▏       | 17/80 [1:47:34<6:38:35, 379.61s/it]

Epoch [17/80]
Train Loss: 4.1395 | Train Acc: 15.74%
Val Loss: 4.1053 | Val Acc: 16.42%
Epoch [18/80]
Train Loss: 4.0888 | Train Acc: 16.75%
Val Loss: 4.0088 | Val Acc: 18.38%


 22%|██▎       | 18/80 [1:53:54<6:32:19, 379.66s/it]

Best model updated and saved to ./best_model.pth


 24%|██▍       | 19/80 [2:00:12<6:25:17, 378.98s/it]

Epoch [19/80]
Train Loss: 4.0441 | Train Acc: 17.65%
Val Loss: 4.1447 | Val Acc: 16.34%


 25%|██▌       | 20/80 [2:06:29<6:18:28, 378.47s/it]

Epoch [20/80]
Train Loss: 4.0068 | Train Acc: 18.32%
Val Loss: 4.1962 | Val Acc: 15.04%
Epoch [21/80]
Train Loss: 3.9628 | Train Acc: 19.18%
Val Loss: 3.9538 | Val Acc: 20.50%


 26%|██▋       | 21/80 [2:12:48<6:12:19, 378.64s/it]

Best model updated and saved to ./best_model.pth
Epoch [22/80]
Train Loss: 3.9414 | Train Acc: 19.63%
Val Loss: 3.9105 | Val Acc: 20.80%


 28%|██▊       | 22/80 [2:19:08<6:06:16, 378.91s/it]

Best model updated and saved to ./best_model.pth


 29%|██▉       | 23/80 [2:25:25<5:59:28, 378.39s/it]

Epoch [23/80]
Train Loss: 3.8845 | Train Acc: 20.86%
Val Loss: 3.9207 | Val Acc: 20.00%
Epoch [24/80]
Train Loss: 3.8394 | Train Acc: 21.76%
Val Loss: 3.8604 | Val Acc: 22.12%


 30%|███       | 24/80 [2:31:43<5:53:06, 378.34s/it]

Best model updated and saved to ./best_model.pth


 31%|███▏      | 25/80 [2:38:00<5:46:28, 377.97s/it]

Epoch [25/80]
Train Loss: 3.8013 | Train Acc: 22.63%
Val Loss: 3.8516 | Val Acc: 21.86%


 32%|███▎      | 26/80 [2:44:19<5:40:29, 378.33s/it]

Epoch [26/80]
Train Loss: 3.7673 | Train Acc: 23.33%
Val Loss: 3.9552 | Val Acc: 21.26%
Epoch [27/80]
Train Loss: 3.7377 | Train Acc: 24.12%
Val Loss: 3.8057 | Val Acc: 22.94%


 34%|███▍      | 27/80 [2:50:39<5:34:27, 378.63s/it]

Best model updated and saved to ./best_model.pth
Epoch [28/80]
Train Loss: 3.6980 | Train Acc: 24.73%
Val Loss: 3.7639 | Val Acc: 24.36%


 35%|███▌      | 28/80 [2:56:58<5:28:25, 378.95s/it]

Best model updated and saved to ./best_model.pth
Epoch [29/80]
Train Loss: 3.6640 | Train Acc: 25.71%
Val Loss: 3.7375 | Val Acc: 24.38%


 36%|███▋      | 29/80 [3:03:17<5:22:03, 378.88s/it]

Best model updated and saved to ./best_model.pth
Epoch [30/80]
Train Loss: 3.6318 | Train Acc: 26.36%
Val Loss: 3.7396 | Val Acc: 24.82%


 38%|███▊      | 30/80 [3:09:37<5:16:04, 379.29s/it]

Best model updated and saved to ./best_model.pth


 39%|███▉      | 31/80 [3:15:54<5:09:12, 378.61s/it]

Epoch [31/80]
Train Loss: 3.5994 | Train Acc: 27.22%
Val Loss: 3.8232 | Val Acc: 23.68%
Epoch [32/80]
Train Loss: 3.5720 | Train Acc: 27.85%
Val Loss: 3.6720 | Val Acc: 26.02%


 40%|████      | 32/80 [3:22:13<5:02:52, 378.59s/it]

Best model updated and saved to ./best_model.pth


 41%|████▏     | 33/80 [3:28:30<4:56:15, 378.21s/it]

Epoch [33/80]
Train Loss: 3.5413 | Train Acc: 28.41%
Val Loss: 3.6975 | Val Acc: 25.46%


 42%|████▎     | 34/80 [3:34:48<4:49:55, 378.17s/it]

Epoch [34/80]
Train Loss: 3.5128 | Train Acc: 29.02%
Val Loss: 3.7268 | Val Acc: 24.88%
Epoch [35/80]
Train Loss: 3.4882 | Train Acc: 29.88%
Val Loss: 3.5781 | Val Acc: 27.42%


 44%|████▍     | 35/80 [3:41:07<4:43:42, 378.27s/it]

Best model updated and saved to ./best_model.pth
Epoch [36/80]
Train Loss: 3.4676 | Train Acc: 30.24%
Val Loss: 3.5925 | Val Acc: 27.92%


 45%|████▌     | 36/80 [3:47:24<4:37:17, 378.12s/it]

Best model updated and saved to ./best_model.pth


 46%|████▋     | 37/80 [3:53:43<4:31:00, 378.15s/it]

Epoch [37/80]
Train Loss: 3.4359 | Train Acc: 30.92%
Val Loss: 3.6778 | Val Acc: 26.50%


 48%|████▊     | 38/80 [4:00:00<4:24:31, 377.90s/it]

Epoch [38/80]
Train Loss: 3.4161 | Train Acc: 31.48%
Val Loss: 3.5846 | Val Acc: 27.56%
Epoch [39/80]
Train Loss: 3.3923 | Train Acc: 32.18%
Val Loss: 3.5706 | Val Acc: 29.42%


 49%|████▉     | 39/80 [4:06:19<4:18:26, 378.20s/it]

Best model updated and saved to ./best_model.pth


 50%|█████     | 40/80 [4:12:37<4:12:04, 378.11s/it]

Epoch [40/80]
Train Loss: 3.3679 | Train Acc: 32.89%
Val Loss: 3.5827 | Val Acc: 27.80%


In [None]:
epochs = np.arange(1, num_epochs + 1, 1)

In [None]:
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss_history, label='train_loss', marker='o')
plt.plot(epochs, val_loss_history, label='val_loss', marker='o')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc_history, label='train_acc', marker='o')
plt.plot(epochs, val_acc_history, label='val_acc', marker='o')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()


plt.show()

In [13]:
class TestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.image_names = sorted([f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))])
        
    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path)
        
        if self.transform:
            image = self.transform(image)
        
        return img_name, image

In [14]:
test_transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5692, 0.5448, 0.4934), (0.1823, 0.1810, 0.1854)) 
])
test_img_dir = 'test'
test_dataset = TestDataset(img_dir=test_img_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

In [None]:
results = []
with torch.no_grad():
    for img_names, images in test_loader:
        images = images.cuda()
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        preds = preds.cpu().numpy()
        for img_name, pred in zip(img_names, preds):
            results.append((img_name, int(pred)))  

ans = pd.DataFrame(results, columns=['Id', 'Category'])
ans.to_csv('labels_test.csv', index=False)

In [None]:
torch.save(model, 'model_after_80_epochs.pth')