In [1]:
from torch.utils.data import Dataset
import torch
from torch import optim
from torch import nn as nn
import os
import glob
import cv2
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from torchvision.transforms import ToTensor
import torch.nn.functional as F
from PIL import Image
import torchvision.transforms as T
from utility import img_transform, EmbeddingHead
from datetime import date

In [2]:
DATA_PATH = "data/pblock-32965-idx_280x175"
NUM_TRAIN = 6593 # 20%
NUM_TEST = 26372 # 80%
NUM_TOTAL = 35912

In [3]:
df = pd.read_csv(os.path.join(DATA_PATH, "metadata.csv"))
df.head()

Unnamed: 0,pallet_id,path,camera,frame,target,frame_rel
0,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1009,0,0
1,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1012,0,1
2,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,2,1007,0,0
3,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,2,1010,0,1
4,1001000000000002949,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1234,1,0


In [4]:
class PalletTupleDataset():
    def __init__(self, data, target, transform=None, target_transform=None):
        self.data = data
        self.target = target
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)-1
        
    def __getitem__(self, idx):
        if idx % 2 == 0:
            # genuine pair
            idx_one = idx
            idx_two = idx+1
            label = 0
        else:
            # impostor pair
            idx_one = idx
            idx_list = list(range(len(self.data)))
            idx_list.remove(idx)
            idx_list.remove(idx+1)
            idx_list.remove(idx-1)
            idx_two = np.random.choice(idx_list)
            label = 1

        img_one = Image.open(self.data[idx_one]).convert('RGB')
        img_two = Image.open(self.data[idx_two]).convert('RGB')
        
        if self.transform:
            img_one = self.transform(img_one)
            img_two = self.transform(img_two)
        if self.target_transform:
            label = self.target_transform(label)
        return img_one, img_two, label

In [5]:
class PalletDataset():
    def __init__(self, data, target, transform=None, target_transform=None):
        self.data = data
        self.target = target
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        img = Image.open(self.data[idx]).convert('RGB')
        label = self.target[idx]
        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            label = self.target_transform(label)
        return img, label

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [8]:
train_selector = (df["target"] < NUM_TRAIN)
trainset = PalletTupleDataset(df.loc[train_selector, "path"].values, df.loc[train_selector, "target"].values, transform=img_transform, target_transform=lambda x: torch.tensor(x, dtype=torch.long))
trainevalset = PalletDataset(df.loc[train_selector, "path"].values, df.loc[train_selector, "target"].values, transform=lambda x: img_transform(x, is_eval=True))

In [11]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=8)
trainevalloader = torch.utils.data.DataLoader(trainevalset, batch_size=128, shuffle=False, num_workers=8)

In [12]:
def calc_energy(emb1, emb2):
    return torch.sum(torch.abs(emb1 - emb2), dim=1)
    #return torch.sqrt(torch.sum(torch.pow(emb1-emb2, 2), dim=1))

def criterion(energy, labels):
  Q = np.sqrt(2048)
  #Q = 2
  result = (1 - labels) * (2/Q) * energy**2 + labels * 2 * Q * torch.exp(-2.77/Q * energy)
  return torch.mean(result)

In [13]:
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = EmbeddingHead()
optimizer = optim.SGD(net.parameters(), lr=0.01)
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[20, 40], gamma=0.3)

Using cache found in /home/nils/.cache/torch/hub/pytorch_vision_v0.10.0


In [11]:
EPOCHS = 80
SAVE_PATH = "model/"

net.to(device)
net.train()
for epoch in range(80):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        x1, x2, labels = data
        optimizer.zero_grad()
        out1 = net(x1.to(device))
        out2 = net(x2.to(device))
        energy = calc_energy(out1, out2)
        loss = criterion(energy, labels.to(device))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 199:.5f}')
            running_loss = 0.0
    torch.save(net.state_dict(), os.path.join(SAVE_PATH, "model_siamese_256x128_" + str(date.today()) + ".pth"))
print('Finished Training')

[1,   200] loss: 8.96572
[1,   400] loss: 7.83560
[1,   600] loss: 7.43859
[1,   800] loss: 7.25818
[2,   200] loss: 7.09279
[2,   400] loss: 7.04176
[2,   600] loss: 6.82981
[2,   800] loss: 6.92160
[3,   200] loss: 6.80908
[3,   400] loss: 6.73568
[3,   600] loss: 6.69473
[3,   800] loss: 6.68413
[4,   200] loss: 6.73638
[4,   400] loss: 6.83117
[4,   600] loss: 6.50616
[4,   800] loss: 6.66442
[5,   200] loss: 6.60479
[5,   400] loss: 6.60484
[5,   600] loss: 6.64086
[5,   800] loss: 6.59295
[6,   200] loss: 6.39964
[6,   400] loss: 6.62460
[6,   600] loss: 6.57419
[6,   800] loss: 6.42573
[7,   200] loss: 6.34897
[7,   400] loss: 6.32949
[7,   600] loss: 6.43521
[7,   800] loss: 6.48019
[8,   200] loss: 6.60736
[8,   400] loss: 6.63190
[8,   600] loss: 6.31328
[8,   800] loss: 6.35557
[9,   200] loss: 6.55708
[9,   400] loss: 6.28086
[9,   600] loss: 6.29948
[9,   800] loss: 6.39912
[10,   200] loss: 6.38498
[10,   400] loss: 6.36781
[10,   600] loss: 6.30054
[10,   800] loss: 6.28