
## Imports
1. time - Record start, stop, elapsed time. Good for evaluating model training times
2. tqdm - Shows progress bar for training loops
3. cv2  - Open source computer vision used to process image files
4. PIL Image - Open/modify numpy image files
5. numpy, pandas - data analysis, manipulation
6. torch - pytorch, deep learning python library
7. albumentations - Image augmentation for deep learning datasets
8. ToTensorV2 - Converts image to a tensor
9. torch.utils.data DataLoader, Dataset - prebuilt data set and data loading code
10. timm - python deep learning library

In [1]:
%%capture
!pip3 install timm
from IPython.display import Image as ImageIpython
from IPython.core.display import HTML 

import time
from tqdm import tqdm_notebook as tqdm
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
from torch.optim import lr_scheduler
import timm
import warnings
warnings.filterwarnings('ignore')

## View Data Files
train.csv - training data  
test.csv - test data  
metadata.csv - actual class names  
test, train - folders of images  

In [2]:
!ls -l ../input/midsw251birds

total 1244
-rw-r--r-- 1 nobody nogroup   5062 Jun  7 17:05 metadata.csv
-rw-r--r-- 1 nobody nogroup 197871 Jun  7 17:05 sample_submission.csv
drwxr-xr-x 2 nobody nogroup      0 Jun  7 17:05 test
-rw-r--r-- 1 nobody nogroup 181377 Jun  7 17:05 test.csv
drwxr-xr-x 2 nobody nogroup      0 Jun  7 17:06 train
-rw-r--r-- 1 nobody nogroup 876817 Jun  7 17:05 train.csv


## Define Hyperparameters
lr - learning rate  
epochs - number of epochs  
batch size - number of training examples used to estimate the error gradient  
num_workers - number of processes that generate batches in parallel  
folds - number of data subdivisions, for 5 folds, typically means 4:1 test to train data  

In [3]:
class args:
    lr = 0.0001
    epochs = 5
    batch_size = 32
    num_workers = 8
    folds = 5

## Divide the data into train, validation, and test data
alldf - All examples  
trndf - Training examples  
valdf - Validation examples  
testdf - Testing examples  
metadf - Metadata

In [4]:
alldf = pd.read_csv('../input/midsw251birds/train.csv')
# Split the training dataset into a training and a validation
valdf = alldf[::args.folds]
trndf = alldf[~alldf.filename.isin(valdf.filename)]
# Load our test data
tstdf = pd.read_csv('../input/midsw251birds/test.csv')
metadf = pd.read_csv('../input/midsw251birds/metadata.csv')
metadf = metadf.set_index('label')
print(f'File shapes -- train : {trndf.shape}, valid : {valdf.shape}, test : {tstdf.shape}')
trndf.head()

File shapes -- train : (26379, 2), valid : (6595, 2), test : (8244, 1)


Unnamed: 0,filename,label
1,train/bb99f4bea973.jpg,147
2,train/50923ceb3ffd.jpg,147
3,train/f9fc3c6da5d7.jpg,147
4,train/dfe8cb1855fe.jpg,147
6,train/4fe53a096533.jpg,147


## Define Transformations for Train and Test data
Perform transformations using albumentations (A):  
  
Train - 50/50 probability of flipping the image horizontally or transposing  
Test - Simply converts image to tensor  
  
Other possibilities include gaussian noise, motion blur, optical distortion

In [5]:
imgnetmeans = [0.22363983, 0.18190407, 0.2523437 ]
imgnetstds = [0.32451536, 0.2956294,  0.31335256]
# Using albumentations, check some examples here : https://albumentations.readthedocs.io/en/latest/examples.html 
def trntransforms():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.Transpose(p=0.5),
        ToTensorV2(),
        ])

def tsttransforms():
    return A.Compose([
        ToTensorV2(),
    ])

class BirdDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        self.data = df
        self.img_dir = '../input/midsw251birds/'
        self.transform = transform
        self.mode = mode

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        
        fname = self.data.iloc[idx]['filename']
        image = cv2.imread(f'{self.img_dir}/{fname}')
        if self.transform is not None:
            image = self.transform(image = image)['image']
        image = image.float() / 255.
        label = -1 if self.mode=='test' else self.data.iloc[idx]['label']
        
        return image, label

## Define + Transform the Dataset

In [6]:
# Define our dataset
trndataset = BirdDataset(trndf, 'train', trntransforms())
valdataset = BirdDataset(valdf, 'valid', tsttransforms())
tstdataset = BirdDataset(tstdf, 'test', tsttransforms())
loaderargs = {'num_workers' : args.num_workers, 'batch_size':args.batch_size, 'pin_memory': False, 'drop_last': False}
trnloader = DataLoader(trndataset, shuffle = True, **loaderargs)
valloader = DataLoader(valdataset, shuffle = False, **loaderargs)
tstloader = DataLoader(tstdataset, shuffle = False, **loaderargs)

## Define the Model Architecture

In [7]:
# creates efficientnet-b0 architecture
device = torch.device("cuda:0")
model = timm.create_model('mixnet_xl', pretrained = True)
model = model.to(device)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
criterion = torch.nn.CrossEntropyLoss()
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)
num_epochs = args.epochs

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_xl_ra-aac3c00c.pth" to /root/.cache/torch/hub/checkpoints/mixnet_xl_ra-aac3c00c.pth


In [8]:
#scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [9]:
# Create a scheduler which will warmup and cooldown over 20 epochs.
# from timm.scheduler.cosine_lr import CosineLRScheduler

n_epochs = 5
n_warmup_epochs = 2
n_steps = len(trnloader)

scheduler = CosineLRScheduler(
            optimizer,
            t_initial= n_steps * n_epochs + 1,
            lr_min=0.00001,
            warmup_lr_init=0.00001,
            warmup_t= n_steps * n_warmup_epochs + 1)

## Train and Validate the Model

In [10]:
global_step=0

since = time.time()
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    model.train()
    running_loss = 0.0
    tk0 = tqdm(trnloader, total=int(len(trnloader)))
    for step, batch in enumerate(tk0):
        inputs = batch[0].to(device, dtype=torch.float)
        labels = batch[1].to(device).long()
        optimizer.zero_grad()
        scheduler.step(global_step)
        global_step+=1
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        tk0.set_postfix(train_loss=(running_loss / (step+1)))
        
    valpreds = []
    model.eval()
    running_loss = 0.0
    tkval = tqdm(valloader, total=int(len(valloader)))
    for step, batch in enumerate(tkval):
        inputs = batch[0].to(device, dtype=torch.float)
        labels = batch[1].to(device).long()
        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        valpreds .append(outputs)
        running_loss += loss.item()
        tkval.set_postfix(valid_loss=(running_loss / (step+1)))
    preds = torch.cat(valpreds).argmax(1).detach().cpu().numpy()
    print(f'Valid accuracy {(valdf.label.values == preds).mean():.4f}')
    

Epoch 0/4


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/207 [00:00<?, ?it/s]

Valid accuracy 0.8881
Epoch 1/4


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/207 [00:00<?, ?it/s]

Valid accuracy 0.9318
Epoch 2/4


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/207 [00:00<?, ?it/s]

Valid accuracy 0.9409
Epoch 3/4


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/207 [00:00<?, ?it/s]

Valid accuracy 0.9404
Epoch 4/4


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/207 [00:00<?, ?it/s]

Valid accuracy 0.9437


## Improvement 1: Changing the Model Architecture

In [11]:
print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  219092 KB |    6592 MB |   80431 GB |   80431 GB |
|       from large pool |  121124 KB |    6491 MB |   79924 GB |   79924 GB |
|       from small pool |   97968 KB |     133 MB |     507 GB |     507 GB |
|---------------------------------------------------------------------------|
| Active memory         |  219092 KB |    6592 MB |   80431 GB |   80431 GB |
|       from large pool |  121124 KB |    6491 MB |   79924 GB |   79924 GB |
|       from small pool |   97968 KB |     133 MB |     507 GB |     507 GB |
|---------------------------------------------------------------

In [None]:
# Submit 
tstpreds = []
tktst = tqdm(tstloader, total=int(len(tstloader)))
for step, batch in enumerate(tktst):
    inputs = batch[0].to(device, dtype=torch.float)
    with torch.no_grad():
        outputs = model(inputs)
        tstpreds.append(outputs)
predicted_labels = torch.cat(tstpreds).argmax(1).detach().cpu().numpy()
tstdf['label'] = predicted_labels

tstdf.to_csv('submission.csv', index = False)

In [12]:
# torch.cuda.empty_cache()
# args.batch_size = 10
# # creates mixnet_xl architecture
# device = torch.device("cuda:0")
# model = timm.create_model('mixnet_xl', pretrained = True)
# model = model.to(device)

# # Define optimizer and criterion
# optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
# criterion = torch.nn.CrossEntropyLoss()
# scheduler = lr_scheduler.StepLR(optimizer, step_size=10)
# num_epochs = args.epochs

# #Reduce batch size for memory considerations
# # args.batch_size = 6

# since = time.time()
# for epoch in range(num_epochs):
#     print('Epoch {}/{}'.format(epoch, num_epochs - 1))
#     scheduler.step()
#     model.train()
#     running_loss = 0.0
#     tk0 = tqdm(trnloader, total=int(len(trnloader)))
#     for step, batch in enumerate(tk0):
#         inputs = batch[0].to(device, dtype=torch.float)
#         labels = batch[1].to(device).long()
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()
#         tk0.set_postfix(train_loss=(running_loss / (step+1)))
        
#     valpreds = []
#     model.eval()
#     running_loss = 0.0
#     tkval = tqdm(valloader, total=int(len(valloader)))
#     for step, batch in enumerate(tkval):
#         inputs = batch[0].to(device, dtype=torch.float)
#         labels = batch[1].to(device).long()
#         with torch.no_grad():
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#         valpreds .append(outputs)
#         running_loss += loss.item()
#         tkval.set_postfix(valid_loss=(running_loss / (step+1)))
#     preds = torch.cat(valpreds).argmax(1).detach().cpu().numpy()
#     print(f'Valid accuracy {(valdf.label.values == preds).mean():.4f}')