In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
from torch import optim
from PIL import Image
import numpy as np
import random
import os
import math
import pycm

In [4]:
# configurations
path = "/content/wacv2016-master/dataset"
path_dest = "/content/samples"
split = 0.8
batch_size = 16
seed = 999
n_samples = [228, 412, 412]

In [28]:
# MobileNetV1
# in_channels = 1, n_classes = 3, input_shape = b * 1 * 100 * 100
class BasicConv2d(nn.Module):
    def __init__(self, ksize, inCH, outCH, padding=0, stride=1):
        super(BasicConv2d, self).__init__()
        self.conv2d = nn.Conv2d(kernel_size=ksize, in_channels=inCH, 
                        out_channels=outCH, padding=padding, stride=stride)
        self.bn = nn.BatchNorm2d(outCH)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv2d(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class DepthwiseConv2d(nn.Module):
    def __init__(self, ksize, inCH, outCH, padding=0, stride=1):
        super(DepthwiseConv2d, self).__init__()
        self.dwConv2d = nn.Conv2d(kernel_size=ksize, in_channels=inCH, 
                            out_channels=inCH, stride=stride, padding=padding, groups=inCH)
        self.bn = nn.BatchNorm2d(inCH)
        self.relu = nn.ReLU(inplace=True)
        self.pointwiseConv2d = BasicConv2d(ksize=1, inCH=inCH, outCH=outCH)

    def forward(self, x):
        x = self.dwConv2d(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.pointwiseConv2d(x)
        return x


class MobileNet(nn.Module):
    def __init__(self, classes=10):
        super(MobileNet, self).__init__()
        self.pre_layer = BasicConv2d(ksize=3, inCH=1, outCH=32)
        self.Depthwise = nn.Sequential(
            DepthwiseConv2d(ksize=3, inCH=32, outCH=64, padding=1),
            DepthwiseConv2d(ksize=3, inCH=64, outCH=128, stride=2, padding=1),
            DepthwiseConv2d(ksize=3, inCH=128, outCH=128, padding=1),
            DepthwiseConv2d(ksize=3, inCH=128, outCH=256, padding=1),
            DepthwiseConv2d(ksize=3, inCH=256, outCH=256, padding=1),
            DepthwiseConv2d(ksize=3, inCH=256, outCH=512, stride=2, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=512, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=512, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=512, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=512, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=512, padding=1),
            DepthwiseConv2d(ksize=3, inCH=512, outCH=1024, stride=2, padding=1),
            DepthwiseConv2d(ksize=3, inCH=1024, outCH=1024, padding=1)
        )
        self.avgpool = nn.AvgPool2d((4, 4))
        self.linear = nn.Linear(1024*3*3, classes)

    def forward(self, x):
        x = self.pre_layer(x)
        x = self.Depthwise(x)
        x = self.avgpool(x)
        x = x.view(-1, 3*3*1024)
        x = self.linear(x)
        return x

In [None]:
# generate samples
# use provided samples.zip or run this cell to generate samples from wacv2016-master
os.mkdir(path_dest)
file_num = []
for i in range(1, 4):
    j = 0
    for file in os.listdir(os.path.join(path, str(i))):
        img = Image.open(os.path.join(path, str(i), file))
        if j < 412:
            if img.size[0] == 100 and img.size[1] == 100 and len(img.size) == 2:
                j += 1
                img.save(os.path.join(path_dest, "{}_{}.jpg".format(i-1, j)))
        else:
            break
    file_num.append(j)
print(file_num)

In [29]:
# dataset split
random.seed(seed)
file_list = []
for i, n in enumerate(n_samples):
  for j in range(n):
    file_list.append("{}_{}.jpg".format(i, j+1))
random.shuffle(file_list)
train_list = file_list[:int(split * len(file_list))]
val_list = file_list[int(split * len(file_list)):]

In [30]:
# dataset class
class WACV2016(Dataset):
    def __init__(self, path, file_list):
        super().__init__()
        self.trans = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])
        self.path = path
        self.file_names = file_list

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.path, self.file_names[index]))
        label = int(self.file_names[index].split(".")[0].split("_")[0])
        return self.trans(img), label

In [31]:
# ...
train_set = WACV2016(path_dest, train_list)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_set = WACV2016(path_dest, val_list)
val_loader = DataLoader(val_set, batch_size=1, shuffle=False)

model = MobileNet(classes=3)
model = model.cuda()
citeration = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

In [32]:
# training
model.train()
for epoch in range(10):
    train_loss = 0
    train_acc = 0
    for i, (image, label) in enumerate(train_loader):
        image, label = image.cuda(), label.cuda()
        optimizer.zero_grad()
        # forward
        output = model(image)
        loss = citeration(output, label.long())
        pred = torch.torch.argmax(output, 1)
        acc = (pred == label).sum() / batch_size
        train_loss += loss.item()
        train_acc += acc.cpu().numpy()
        # backward
        loss.backward()
        optimizer.step()
    print("Epoch: {}  Loss: {}  Acc: {}".format(
        epoch+1, train_loss/len(train_loader), train_acc/len(train_loader)))

Epoch: 1  Loss: 1.064754716630252  Acc: 0.4033018867924528
Epoch: 2  Loss: 1.0321553327002615  Acc: 0.44339622641509435
Epoch: 3  Loss: 1.0092262198340218  Acc: 0.4858490566037736
Epoch: 4  Loss: 0.9971757013842745  Acc: 0.5082547169811321
Epoch: 5  Loss: 0.9811567079346135  Acc: 0.5129716981132075
Epoch: 6  Loss: 0.9661633496014577  Acc: 0.5247641509433962
Epoch: 7  Loss: 0.9649738928057113  Acc: 0.5011792452830188
Epoch: 8  Loss: 0.9545257338937724  Acc: 0.5153301886792453
Epoch: 9  Loss: 0.9407656642625917  Acc: 0.5306603773584906
Epoch: 10  Loss: 0.9365216358652655  Acc: 0.535377358490566


In [33]:
# validation
model.eval()
y_pred = []
y_true = []
for i, (image, label) in enumerate(val_loader):
        image, label = image.cuda(), label.cuda()
        # forward
        output = model(image)
        pred = torch.argmax(output, 1).unsqueeze(0)
        y_pred.append(int(pred.cpu().numpy()))
        y_true.append(int(label.unsqueeze(0).cpu().numpy()))
print(y_pred)
print(y_true)

[2, 1, 1, 0, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 0, 2, 2, 1, 2, 2, 2, 2, 0, 1, 2, 1, 1, 1, 2, 2, 1, 2, 0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 2, 0, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 0]
[1, 2, 2, 0, 2, 2, 1, 2, 0, 0, 1, 2, 2, 1, 1, 1, 1, 2, 0, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 0, 2, 0, 1, 0, 1, 2, 1, 2, 2, 1, 1, 0, 2, 2, 1, 1, 0, 1, 1, 2, 1, 2, 2, 1, 2, 2, 0, 0, 2, 1, 2, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 0, 2, 2, 2, 1, 2, 0, 1, 2, 2, 1, 2, 1, 1, 0, 2,

In [34]:
cm = pycm.ConfusionMatrix(y_true, y_pred, digit=5)

In [35]:
cm.ACC

{0: 0.8199052132701422, 1: 0.5781990521327014, 2: 0.5781990521327014}

In [36]:
cm.GI

{0: 0.23679912901469802, 1: 0.08791832104367558, 2: 0.1941176470588235}

In [37]:
cm.AUC

{0: 0.618399564507349, 1: 0.5439591605218378, 2: 0.5970588235294118}

In [38]:
cm.AGF

{0: 0.5149201703743067, 1: 0.5117645387620399, 2: 0.6459840247571285}