In [1]:
# %load_ext autoreload
# %autoreload 2
# from core.utils import create_image_dataset
# num_demos = 300
# seed = 42
# create_image_dataset('MiniWorld-OneRoom-v0', 'top', num_demos=num_demos, seed=seed)

In [2]:
# %matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import torch
import random
from torch import nn
from torch.nn import functional as F
import os
from pathlib import Path
import numpy as np
import torch.optim as optim
from torchvision.transforms.transforms import RandomApply
from core.transforms import GaussianBlur, TwoCropsTransform
from core.custom_dataset import DatasetFolderSorted, ImageFolderSorted
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
from agents.net import MLP, Encoder, SimSiam
import time

In [3]:

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
augmentation = [
    transforms.RandomResizedCrop(50, scale=(0.2, 1.)),
    transforms.RandomApply([
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
    ], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5),
    transforms.ToTensor(),
    normalize
]

In [4]:

train_dataset = datasets.ImageFolder(
    'dataset/MiniWorld-OneRoom-v0/agent/D300',
    TwoCropsTransform(
        transforms.Compose(augmentation)
    )
)

In [5]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True
)

In [6]:
model = SimSiam().cuda()


In [7]:
criterion = nn.CosineSimilarity(dim=1).cuda()
fix_pred_lr = False
init_lr = 0.001

if fix_pred_lr:
    optim_params = [{'params': model.encoder.parameters(), 'fix_lr': False},
                    {'params': model.predictor.parameters(), 'fix_lr': True}]
else:
    # optim_params = model.parameters()
    optim_params = [{'params': model.encoder.parameters(), 'lr': 1e-4},
                    {'params': model.predictor.parameters(), 'lr': 1e-3}]

optimizer = optim.Adam(optim_params, lr=0.1)

In [8]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [9]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, _) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # if args.gpu is not None:
        #     images[0] = images[0].cuda(args.gpu, non_blocking=True)
        #     images[1] = images[1].cuda(args.gpu, non_blocking=True)

        # compute output and loss
        # print(len(images))
        # input()
        p1, p2, z1, z2 = model(x1=images[0].cuda(), x2=images[1].cuda())
        # print(p1.size(), p2.size(), z1.size(), z2.size())
        loss = -(criterion(p1, z2).mean() + criterion(p2, z1).mean()) * 0.5

        losses.update(loss.item(), images[0].size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % (max(len(train_loader)//10, 1)) == 0:
            progress.display(i)
for epoch in range(1, 1 + 10):
    train(train_loader, model, criterion, optimizer, epoch=epoch)

Epoch: [1][ 0/78]	Time  0.425 ( 0.425)	Data  0.163 ( 0.163)	Loss -0.0095 (-0.0095)
Epoch: [1][ 7/78]	Time  0.203 ( 0.232)	Data  0.167 ( 0.164)	Loss -0.1689 (-0.0938)
Epoch: [1][14/78]	Time  0.223 ( 0.219)	Data  0.182 ( 0.163)	Loss -0.2685 (-0.1582)
Epoch: [1][21/78]	Time  0.207 ( 0.214)	Data  0.166 ( 0.162)	Loss -0.3099 (-0.2041)
Epoch: [1][28/78]	Time  0.199 ( 0.211)	Data  0.157 ( 0.162)	Loss -0.3573 (-0.2369)
Epoch: [1][35/78]	Time  0.196 ( 0.208)	Data  0.159 ( 0.161)	Loss -0.3766 (-0.2613)
Epoch: [1][42/78]	Time  0.203 ( 0.206)	Data  0.160 ( 0.160)	Loss -0.3782 (-0.2806)
Epoch: [1][49/78]	Time  0.216 ( 0.206)	Data  0.172 ( 0.160)	Loss -0.3894 (-0.2957)
Epoch: [1][56/78]	Time  0.199 ( 0.205)	Data  0.154 ( 0.160)	Loss -0.4000 (-0.3076)
Epoch: [1][63/78]	Time  0.274 ( 0.207)	Data  0.223 ( 0.161)	Loss -0.3859 (-0.3171)
Epoch: [1][70/78]	Time  0.224 ( 0.207)	Data  0.177 ( 0.161)	Loss -0.4053 (-0.3258)
Epoch: [1][77/78]	Time  0.110 ( 0.206)	Data  0.081 ( 0.161)	Loss -0.4198 (-0.3329)
Epoc

In [10]:
from core.hist_dataset import DemoDataPreviousAction

cls_train_dataset = DemoDataPreviousAction(demo_folder='demos/MiniWorld-OneRoom-v0/agent', nb_demos=300, transform=transforms.Compose(augmentation))
cls_train_dataloader = DataLoader(cls_train_dataset,batch_size=128,shuffle=True)

In [11]:
mlp_classifier = MLP(dim=2304+9).cuda()
# weights = [1., 1., 0.1, 0.01, 0.01, 0.01, 0.01, 0.01]
# class_weights = torch.FloatTensor(weights)
# sup_criterion = nn.CrossEntropyLoss(weight=class_weights).cuda()
sup_criterion = nn.CrossEntropyLoss().cuda()
sup_opt = optim.Adam(mlp_classifier.parameters(), lr=1e-3)
def grad_model(model, requires_grad: bool):
    for param in model.parameters():
        param.requires_grad = requires_grad

# freeze representation, use encoder to get features, then train the FCN
def cls_train(train_loader, model, cls_mlp, criterion, optimizer, epoch):
    enc_model = model.encoder
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    enc_model.train()
    grad_model(enc_model, False)
    cls_mlp.train()

    end = time.time()
    for i, batch in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # if args.gpu is not None:
        #     images[0] = images[0].cuda(args.gpu, non_blocking=True)
        #     images[1] = images[1].cuda(args.gpu, non_blocking=True)

        # compute output and loss
        # print(len(images))
        # input()
        images = batch['obs'].cuda()
        prev_action = batch['prev_a'].cuda()
        target = batch['a']
        z = enc_model(images.cuda())
        # print(p1.size(), p2.size(), z1.size(), z2.size())
        z = torch.cat((z, prev_action), dim=-1)
        logits = cls_mlp(z)
        loss = criterion(logits, target.cuda().flatten())

        losses.update(loss.item(), images[0].size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % (max(len(train_loader)//10, 1)) == 0:
            progress.display(i)
for epoch in range(1, 1 + 20):
    cls_train(cls_train_dataloader,
              model,
              mlp_classifier,
              sup_criterion, sup_opt, epoch=epoch)

Epoch: [1][ 0/78]	Time  0.084 ( 0.084)	Data  0.065 ( 0.065)	Loss 2.0715 (2.0715)
Epoch: [1][ 7/78]	Time  0.083 ( 0.083)	Data  0.062 ( 0.064)	Loss 1.0289 (1.4329)
Epoch: [1][14/78]	Time  0.084 ( 0.083)	Data  0.063 ( 0.063)	Loss 0.9060 (1.2427)
Epoch: [1][21/78]	Time  0.088 ( 0.084)	Data  0.067 ( 0.064)	Loss 0.8038 (1.1093)
Epoch: [1][28/78]	Time  0.088 ( 0.085)	Data  0.068 ( 0.064)	Loss 0.7727 (1.0317)
Epoch: [1][35/78]	Time  0.086 ( 0.085)	Data  0.066 ( 0.064)	Loss 0.7756 (0.9675)
Epoch: [1][42/78]	Time  0.081 ( 0.084)	Data  0.062 ( 0.064)	Loss 0.5486 (0.9156)
Epoch: [1][49/78]	Time  0.081 ( 0.084)	Data  0.061 ( 0.064)	Loss 0.8673 (0.8866)
Epoch: [1][56/78]	Time  0.084 ( 0.084)	Data  0.064 ( 0.064)	Loss 0.7093 (0.8592)
Epoch: [1][63/78]	Time  0.082 ( 0.084)	Data  0.063 ( 0.064)	Loss 0.7399 (0.8374)
Epoch: [1][70/78]	Time  0.079 ( 0.084)	Data  0.062 ( 0.064)	Loss 0.6345 (0.8213)
Epoch: [1][77/78]	Time  0.042 ( 0.083)	Data  0.031 ( 0.063)	Loss 0.7244 (0.8067)
Epoch: [2][ 0/78]	Time  0.08

In [14]:
torch.save({
    'encoder_dict': model.encoder.state_dict(),
    'mlp_dict': mlp_classifier.state_dict()
}, 'test_simsiam.pt')

In [13]:
!python simsiam_eval.py --env-name MiniWorld-OneRoom-v0 --model_path test_simsiam.pt

r
