- [Training](#Training)

# Training

In [1]:
import os
from PIL import Image

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.models import resnet18

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

## Hyperparameters

In [2]:
IMAGE_SIZE = 128

BATCH_SIZE = 64

LR = 0.0001
EPOCHS = 10

In [3]:
# Reading the Datafarme
data = pd.read_csv('./dataset/train_challenge.csv')
data.head()

Unnamed: 0,0,1
0,0.jpg,Adhered
1,1.jpg,Adhered
2,2.jpg,Concrete
3,3.jpg,Concrete
4,4.jpg,Plastic & fabric


In [4]:
# Setup Classes
CLASSES = ['Adhered', 'Ballasted', 'Concrete', 'Plastic & fabric', 'Shingle', 'Steel']

classes_to_idx = {cls: idx for idx, cls in enumerate(CLASSES)}
idx_to_classes = {idx: cls for idx, cls in enumerate(CLASSES)}

In [5]:
# replace '1' column with classes_to_idx dict
data = data.replace({'1': classes_to_idx})
data.head()

Unnamed: 0,0,1
0,0.jpg,0
1,1.jpg,0
2,2.jpg,2
3,3.jpg,2
4,4.jpg,3


In [6]:
# Make X and Y from dataframe
X = data['0'].values
Y = data['1'].values

In [7]:
# Split into train, validation and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

print(f"Train size: {len(X_train)}\nValidation size: {len(X_val)}\nTest size: {len(X_test)}")

Train size: 6223
Validation size: 692
Test size: 1729


## Torch Dataset and Dataloader

In [8]:
class SatelliteDataset(Dataset):
    def __init__(self, dataroot: str, X_array: np.array, Y_array: np.array, transform = None, target_transform = None):
        self.dataroot = dataroot
        self.X_array = X_array
        self.Y_array = Y_array
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        file_name = self.X_array[index]
        img = Image.open(os.path.join(self.dataroot, file_name)).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
            
        label = np.array(self.Y_array[index])
        if self.target_transform is not None:
            label = self.target_transform(label)
        else:
            label = torch.from_numpy(label)

        return {'image': img, 'label': label, 'image_name': file_name}

    def __len__(self):
        return len(self.X_array)

### Datasets

In [9]:
train_dataset = SatelliteDataset(dataroot='./dataset/train/', X_array=X_train, Y_array=y_train, 
                                 transform=transforms.Compose([transforms.RandomResizedCrop(IMAGE_SIZE),
                                                               transforms.ToTensor(),
                                                               transforms.Normalize([0.4728, 0.4762, 0.4692],
                                                                                    [0.2558, 0.2532, 0.2457])]))

val_dataset = SatelliteDataset(dataroot='./dataset/train/', X_array=X_train, Y_array=y_train, 
                               transform=transforms.Compose([transforms.Resize(IMAGE_SIZE, IMAGE_SIZE),
                                                             transforms.ToTensor(),
                                                             transforms.Normalize([0.4728, 0.4762, 0.4692],
                                                                                  [0.2558, 0.2532, 0.2457])]))

test_dataset = SatelliteDataset(dataroot='./dataset/train/', X_array=X_train, Y_array=y_train, 
                                transform=transforms.Compose([transforms.Resize(IMAGE_SIZE, IMAGE_SIZE),
                                                              transforms.ToTensor(),
                                                              transforms.Normalize([0.4728, 0.4762, 0.4692],
                                                                                   [0.2558, 0.2532, 0.2457])]))

### Dataloaders

In [10]:
trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
testloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## CUDA

In [11]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
if torch.cuda.is_available():
    print(f"Using {torch.cuda.get_device_name()} for Training")
else:
    print("Using CPU for Training")

Using GeForce 940MX for Training


## Models

In [12]:
class ResnetModel(nn.Module):
    def __init__(self):
        super(ResnetModel, self).__init__()
        r = resnet18(pretrained=True)
        fc = nn.Linear(r.fc.in_features, len(CLASSES))
        r.fc = fc
        self.model = r
        
    def forward(self, x):
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x

In [13]:
model = ResnetModel()
model = model.to(device)
model

ResnetModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

## Loss and Optimizer

In [14]:
# Loss
criterion = nn.NLLLoss().cuda() if torch.cuda.is_available() else nn.NLLLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=LR)

## LR scheduler

In [15]:
scheduler = None

## Training

In [16]:
train_losses = []
test_losses = []

for epoch in range(1, EPOCHS + 1):
    #Training
    train_loss = 0.0
    test_loss = 0.0
    for data in trainloader:
        x = data['image'].to(device)
        y = data['label'].to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * x.size(0)
    for data in valloader:
        x = data['image'].to(device)
        y = data['label'].to(device)
        out = model(x)
        loss = criterion(out, y)
        test_loss += loss.item() * x.size(0)
        
    train_loss /= len(trainloader)
    test_loss /= len(valloader)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
    if scheduler is None:
        print("No Schedular found. LR will not change")
    else:
        if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau):
            scheduler.step(test_loss)
        else:
            scheduler.step()
        
    print(f"Epoch: {epoch}/{EPOCHS}, training loss: {train_loss} validation loss: {test_loss}")

RuntimeError: stack expects each tensor to be equal size, but got [3, 128, 146] at entry 0 and [3, 128, 144] at entry 1