##### Implementation of MoCov3 to detect dog emotions.
- Batch Size : 128
- Image resoltion : 224 X 224
- Status : Building

In [3]:
import os
import math
import shutil
import random
import time

In [4]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

In [5]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as torchvision_models
from torch.utils.tensorboard import SummaryWriter

In [6]:
import moco.builder
import moco.loader
import moco.LARS

In [7]:
BATCH_SIZE = 128
RESOLUTION = 224
DISTRIBUTED = False
NUM_WORKERS = 4

##### Data Generator.
- Transforms yanked from the BYOL paper https://arxiv.org/abs/2006.07733.

In [8]:
augmentation1 = [
        transforms.RandomResizedCrop(RESOLUTION, scale=(0.8, 1.)),
        transforms.RandomApply([
            transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)  # not strengthened
        ], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=1.0),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]

augmentation2 = [
        transforms.RandomResizedCrop(RESOLUTION, scale=(0.8, 1.)),
        transforms.RandomApply([
            transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)  # not strengthened
        ], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.1),
        transforms.RandomApply([moco.loader.Solarize()], p=0.2),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.473, 0.440, 0.386],
                                     std=[0.269, 0.260, 0.262])
    ]

In [9]:
traindir = 'data/Daniel_Shan_Balico/train/'
testdir = 'data/Daniel_Shan_Balico/test/'
valdir = 'data/Daniel_Shan_Balico/val/'

train_dataset = datasets.ImageFolder(
        traindir,
        moco.loader.TwoCropsTransform(transforms.Compose(augmentation1), 
                                      transforms.Compose(augmentation2)))

##### ! Issue Faced ! If DISTRIBUTED = True, the following error occurs:  
- Default process group has not been initialized, please make sure to call init_process_group.
- SOLVED : Benched relevant code. (Would only need for multiple GPUs)

In [10]:
# if DISTRIBUTED:
#     train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
# else:
#     train_sampler = None

train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=BATCH_SIZE, shuffle = False,
        num_workers=NUM_WORKERS, pin_memory=True, drop_last=True)

##### Service Functions

In [17]:
EPOCHS = 150
LEARNING_RATE = 1.5e-4
WARMUP_EPOCHS = 40
MOCO_MOMENTUM = 0.2

In [18]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def adjust_learning_rate(optimizer, epoch):
    """Decays the learning rate with half-cycle cosine after warmup"""
    if epoch < WARMUP_EPOCHS:
        lr = LEARNING_RATE * epoch / WARMUP_EPOCHS
    else:
        lr = LEARNING_RATE * 0.5 * (1. + math.cos(math.pi * (epoch - WARMUP_EPOCHS) / (EPOCHS - WARMUP_EPOCHS)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr


def adjust_moco_momentum(epoch):
    """Adjust moco momentum based on current epoch"""
    m = 1. - 0.5 * (1. + math.cos(math.pi * epoch / EPOCHS)) * (1. - MOCO_MOMENTUM)
    return m

In [11]:
train_l = enumerate(train_loader)
a = next(train_l)

In [12]:
type(a)
len(train_loader)

25

##### Training Loop
- 150 epochs.

In [21]:
NUM_GPU = 1
MOCO_COS = True

In [23]:
def train(train_loader, model, optimizer, scaler, summary_writer, epoch, args):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    learning_rates = AverageMeter('LR', ':.4e')
    losses = AverageMeter('Loss', ':.4e')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, learning_rates, losses],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    iters_per_epoch = len(train_loader)
    moco_m = args.moco_m
    for i, (images, _) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # adjust learning rate and momentum coefficient per iteration
        lr = adjust_learning_rate(optimizer, epoch + i / iters_per_epoch, args)
        learning_rates.update(lr)
        if MOCO_COS:
            moco_m = adjust_moco_momentum(epoch + i / iters_per_epoch, args)

        if NUM_GPU is not None:
            images[0] = images[0].cuda(args.gpu, non_blocking=True)
            images[1] = images[1].cuda(args.gpu, non_blocking=True)

        # compute output
        with torch.cuda.amp.autocast(True):
            loss = model(images[0], images[1], moco_m)

        losses.update(loss.item(), images[0].size(0))
        if args.rank == 0:
            summary_writer.add_scalar("loss", loss.item(), epoch * iters_per_epoch + i)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)

In [26]:
EPOCHS = 150

In [29]:
for epoch in range(EPOCHS):
    pass
    #Will require GPU manip and architecture of down-stream models