In [1]:
# %matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import torch
import random
from torch import nn
from torch.nn import functional as F
import os
from pathlib import Path
import numpy as np
import torch.optim as optim
from torchvision.transforms.transforms import RandomApply
from core.transforms import GaussianBlur, TwoCropsTransform
from core.custom_dataset import DatasetFolderSorted, ImageFolderSorted
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
from agents.net import MLP, Encoder, SimSiam
import time

In [2]:

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
augmentation = [
    transforms.RandomResizedCrop(50, scale=(0.2, 1.)),
    transforms.RandomApply([
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
    ], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
]

In [3]:
train_dataset = datasets.ImageFolder(
    'dataset/MiniWorld-FourRooms-v0/agent/D300',
    TwoCropsTransform(
        transforms.Compose(augmentation)
    )
)

In [4]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True
)

In [5]:
model = SimSiam().cuda()


In [6]:
criterion = nn.CosineSimilarity(dim=1).cuda()
fix_pred_lr = False
init_lr = 0.05

if fix_pred_lr:
    optim_params = [{'params': model.module.encoder.parameters(), 'fix_lr': False},
                    {'params': model.module.predictor.parameters(), 'fix_lr': True}]
else:
    optim_params = model.parameters()

optimizer = optim.Adam(optim_params, lr=0.05)

In [7]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [8]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, _) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # if args.gpu is not None:
        #     images[0] = images[0].cuda(args.gpu, non_blocking=True)
        #     images[1] = images[1].cuda(args.gpu, non_blocking=True)

        # compute output and loss
        # print(len(images))
        # input()
        p1, p2, z1, z2 = model(x1=images[0].cuda(), x2=images[1].cuda())
        # print(p1.size(), p2.size(), z1.size(), z2.size())
        loss = -(criterion(p1, z2).mean() + criterion(p2, z1).mean()) * 0.5

        losses.update(loss.item(), images[0].size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 100 == 0:
            progress.display(i)
train(train_loader, model, criterion, optimizer, epoch=10)

Epoch: [10][   0/1667]	Time  0.207 ( 0.207)	Data  0.027 ( 0.027)	Loss 0.0566 (0.0566)
Epoch: [10][ 100/1667]	Time  0.023 ( 0.025)	Data  0.020 ( 0.020)	Loss -0.9998 (-0.9496)
Epoch: [10][ 200/1667]	Time  0.023 ( 0.024)	Data  0.019 ( 0.020)	Loss -0.9997 (-0.9745)
Epoch: [10][ 300/1667]	Time  0.024 ( 0.024)	Data  0.020 ( 0.020)	Loss -0.9997 (-0.9829)
Epoch: [10][ 400/1667]	Time  0.026 ( 0.023)	Data  0.023 ( 0.020)	Loss -0.9998 (-0.9871)
Epoch: [10][ 500/1667]	Time  0.023 ( 0.023)	Data  0.020 ( 0.020)	Loss -0.9999 (-0.9896)
Epoch: [10][ 600/1667]	Time  0.028 ( 0.023)	Data  0.025 ( 0.020)	Loss -0.9997 (-0.9913)
Epoch: [10][ 700/1667]	Time  0.023 ( 0.023)	Data  0.020 ( 0.020)	Loss -0.9999 (-0.9925)
Epoch: [10][ 800/1667]	Time  0.023 ( 0.023)	Data  0.020 ( 0.020)	Loss -0.9998 (-0.9934)
Epoch: [10][ 900/1667]	Time  0.022 ( 0.023)	Data  0.018 ( 0.020)	Loss -0.9998 (-0.9941)
Epoch: [10][1000/1667]	Time  0.023 ( 0.023)	Data  0.019 ( 0.020)	Loss -0.9998 (-0.9947)
Epoch: [10][1100/1667]	Time  0.025