In [1]:
from torch.utils.data import Dataset
import torch
from torch import optim
from torch import nn as nn
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import pandas as pd
import glob
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
import torchvision.transforms as T
from PIL import Image
from utility import img_transform, EmbeddingHead
from datetime import date

In [2]:
DATA_PATH = "data/pblock-32965-idx_280x175"
NUM_TRAIN = 6593 # 20%
NUM_TEST = 26372 # 80%
NUM_TOTAL = 35912

In [3]:
df = pd.read_csv(os.path.join(DATA_PATH, "metadata.csv"))
df.head()

Unnamed: 0,pallet_id,path,camera,frame,target,frame_rel
0,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1009,0,0
1,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1012,0,1
2,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,2,1007,0,0
3,1001000000000002948,/home/nils/Documents/ude/pallet/data/pblock-32...,2,1010,0,1
4,1001000000000002949,/home/nils/Documents/ude/pallet/data/pblock-32...,1,1234,1,0


In [4]:
class PalletDataset():
    def __init__(self, data, target, transform=None, target_transform=None):
        self.data = data
        self.target = target
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        img = Image.open(self.data[idx]).convert('RGB')
        label = self.target[idx]
        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            label = self.target_transform(label)
        return img, label

In [5]:
train_selector = (df["target"] < NUM_TRAIN)
trainset = PalletDataset(df.loc[train_selector, "path"].values, df.loc[train_selector, "target"].values, transform=img_transform, target_transform=lambda x: torch.tensor(x, dtype=torch.long))
trainevalset = PalletDataset(df.loc[train_selector, "path"].values, df.loc[train_selector, "target"].values, transform=lambda x: img_transform(x, is_eval=True), target_transform=lambda x: torch.tensor(x, dtype=torch.long))
print("Trainset: ", len(trainset))
print("Trainevalset: ", len(trainevalset))

Trainset:  26372
Trainevalset:  26372
Dataset:  131860


In [6]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=8)

In [7]:
class ClassifierHead(nn.Module):
    def __init__(self, in_features, n_classes):
        super().__init__()
        self.fc1 = nn.Linear(in_features, n_classes)

    def forward(self, x):
        return self.fc1(x)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [9]:
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = ClassifierHead(net.fc.in_features, NUM_TRAIN)
optimizer = optim.SGD(net.parameters(), lr=0.01)
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[20, 40], gamma=0.3)
criterion = torch.nn.CrossEntropyLoss()

Using cache found in /home/nils/.cache/torch/hub/pytorch_vision_v0.10.0


In [10]:
EPOCHS = 80
SAVE_PATH = "model/"

net.to(device)
net.train()

for epoch in range(EPOCHS):
    running_loss = 0.0
    total = torch.tensor(0).to(device)
    correct = torch.tensor(0).to(device)
    for i, data in enumerate(trainloader, 0):
        x, y = data
        optimizer.zero_grad()
        out = net(x.to(device))
        loss = criterion(out, y.to(device))
        loss.backward()
        optimizer.step()

        _, pred = torch.max(out.data, 1)
        total += y.size(0)
        correct += (pred == y.to(device)).sum().item()

        running_loss += loss.item()
        ratio = (correct / total).to("cpu")
        if i % 200 == 199:
            print(f'[{epoch + 1}, {i + 1:3d}] loss: {running_loss / 199:.3f} acc: ({100*ratio:.2f})')
            running_loss = 0.0
    lr_scheduler.step()
    torch.save(net.state_dict(), os.path.join(SAVE_PATH, "model_classifier_256x128_e" + str(epoch) + "_" + str(date.today()) + ".pth"))
print('Finished Training')

[1, 200] loss: 8.887 acc: (0.02)
[1, 400] loss: 8.889 acc: (0.01)
[1, 600] loss: 8.869 acc: (0.02)
[1, 800] loss: 8.832 acc: (0.02)
[2, 200] loss: 8.624 acc: (0.22)
[2, 400] loss: 8.593 acc: (0.27)
[2, 600] loss: 8.533 acc: (0.27)
[2, 800] loss: 8.466 acc: (0.29)
[3, 200] loss: 8.187 acc: (0.73)
[3, 400] loss: 8.112 acc: (0.95)
[3, 600] loss: 8.041 acc: (1.00)
[3, 800] loss: 7.947 acc: (0.97)
[4, 200] loss: 7.605 acc: (2.12)
[4, 400] loss: 7.536 acc: (2.17)
[4, 600] loss: 7.478 acc: (2.23)
[4, 800] loss: 7.381 acc: (2.26)
[5, 200] loss: 7.006 acc: (4.84)
[5, 400] loss: 6.956 acc: (4.81)
[5, 600] loss: 6.864 acc: (4.82)
[5, 800] loss: 6.800 acc: (4.91)
[6, 200] loss: 6.407 acc: (8.41)
[6, 400] loss: 6.355 acc: (8.12)
[6, 600] loss: 6.304 acc: (8.16)
[6, 800] loss: 6.217 acc: (8.36)
[7, 200] loss: 5.820 acc: (14.42)
[7, 400] loss: 5.797 acc: (13.59)
[7, 600] loss: 5.765 acc: (13.45)
[7, 800] loss: 5.661 acc: (13.58)
[8, 200] loss: 5.289 acc: (19.70)
[8, 400] loss: 5.283 acc: (19.04)
[8, 

## Eval

In [11]:
trainevalloader = torch.utils.data.DataLoader(trainevalset, batch_size=256, shuffle=False, num_workers=8)

In [12]:
net.eval()
net.to(device)
with torch.no_grad():
    total = torch.tensor(0).to(device)
    correct = torch.tensor(0).to(device)
    for i, data in enumerate(trainevalloader, 0):
        x, y = data
        out = net(x.to(device))
        _, pred = torch.max(out.data, 1)
        total += y.size(0)
        correct += (pred == y.to(device)).sum().item()
acc = (correct / total * 100).to("cpu")
print("Trainset acc: %.2f" %(acc))

Trainset acc: 98.94
