In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
from torch import optim
from PIL import Image
import numpy as np
import random
import os
import math
import pycm

In [30]:
# configurations
path = "/content/wacv2016-master/dataset"
path_dest = "/content/wacv2016-master/samples"
split = 0.8
batch_size = 16

In [31]:
# DenseNet-121
# in_channels = 1, n_classes = 3, input_shape = b * 1 * 100 * 100
class BasicConv2d(nn.Module):
    def __init__(self, ksize, inCH, outCH, stride=1, padding=0):
        super(BasicConv2d, self).__init__()
        self.bn = nn.BatchNorm2d(inCH)
        self.conv2d = nn.Conv2d(kernel_size=ksize, in_channels=inCH,
                         out_channels=outCH, stride=stride, padding=padding)

    def forward(self, x):
        x = self.bn(x)
        x = F.relu(x)
        x = self.conv2d(x)
        return x


class BottleNeck(nn.Module):
    def __init__(self, inCH, k=32):
        super(BottleNeck, self).__init__()
        self.conv2d_1x1 = BasicConv2d(ksize=1, inCH=inCH, outCH=4*k)
        self.conv2d_3x3 = BasicConv2d(ksize=3, inCH=4*k, outCH=k, padding=1)

    def forward(self, x):
        left = self.conv2d_1x1(x)
        left = self.conv2d_3x3(left)
        out = torch.cat([x, left], dim=1)
        return out

class DenseBlock(nn.Module):
    def __init__(self, inCH, layernum=6, k=32):
        super(DenseBlock, self).__init__()
        self.layernum = layernum
        self.k = k
        self.inCH = inCH
        self.outCH = inCH + k * layernum
        self.block = self.make_layer(layernum)

    def forward(self, x):
        out = self.block(x)
        return out

    def make_layer(self, layernum):
        layers = []
        inchannels = self.inCH
        for i in range(layernum):
            layers.append(BottleNeck(inCH=inchannels, k=self.k))
            inchannels += self.k
        return nn.Sequential(*layers)


class Transition(nn.Module):
    def __init__(self, inCH, theta=0.5):
        super(Transition, self).__init__()
        self.outCH = int(math.floor(theta*inCH))
        self.bn = nn.BatchNorm2d(inCH)
        self.conv2d_1x1 = nn.Conv2d(kernel_size=1, in_channels=inCH, out_channels=self.outCH)
        self.avgpool = nn.AvgPool2d((2, 2), stride=2)

    def forward(self, x):
        x = self.bn(x)
        x = F.relu(x)
        x = self.conv2d_1x1(x)
        x = self.avgpool(x)
        return x


class DenseNet121(nn.Module):
    def __init__(self, k=32, theta=0.5, classes=10):
        super(DenseNet121 ,self).__init__()
        self.k=k
        self.theta = theta
        self.pre_layer = BasicConv2d(ksize=3, inCH=1, outCH=2*self.k, padding=1)
        self.DenseBlock_1 = DenseBlock(inCH=2*self.k, layernum=6, k=self.k)
        self.Transition_1 = Transition(inCH=self.DenseBlock_1.outCH, theta=self.theta)
        self.DenseBlock_2 = DenseBlock(inCH=self.Transition_1.outCH, layernum=12, k=self.k)
        self.Transition_2 = Transition(inCH=self.DenseBlock_2.outCH, theta=self.theta)
        self.DenseBlock_3 = DenseBlock(inCH=self.Transition_2.outCH, layernum=24, k=self.k)
        self.Transition_3 = Transition(inCH=self.DenseBlock_3.outCH, theta=self.theta)
        self.DenseBlock_4 = DenseBlock(inCH=self.Transition_3.outCH, layernum=16, k=self.k)
        self.bn = nn.BatchNorm2d(self.DenseBlock_4.outCH)
        self.avgpool = nn.AvgPool2d((4, 4))
        self.linear = nn.Linear(self.DenseBlock_4.outCH*3*3, classes)

    def forward(self, x):
        x = self.pre_layer(x)
        x = self.DenseBlock_1(x)
        x = self.Transition_1(x)
        x = self.DenseBlock_2(x)
        x = self.Transition_2(x)
        x = self.DenseBlock_3(x)
        x = self.Transition_3(x)
        x = self.DenseBlock_4(x)
        x = self.bn(x)
        x = F.relu(x)
        x = self.avgpool(x)
        x = x.view(-1, self.DenseBlock_4.outCH*3*3)
        x = self.linear(x)
        return x

In [6]:
# generate samples
os.mkdir(path_dest)
file_num = []
for i in range(1, 4):
    j = 0
    for file in os.listdir(os.path.join(path, str(i))):
        img = Image.open(os.path.join(path, str(i), file))
        if j < 412:
            if img.size[0] == 100 and img.size[1] == 100 and len(img.size) == 2:
                j += 1
                img.save(os.path.join(path_dest, "{}_{}.jpg".format(i-1, j)))
        else:
            break
    file_num.append(j)
print(file_num)

[228, 412, 412]


In [32]:
# dataset split
file_list = os.listdir(path_dest)
train_list = file_list[:int(split * len(file_list))]
val_list = file_list[int(split * len(file_list)):]

In [33]:
# dataset class
class WACV2016(Dataset):
    def __init__(self, path, file_list):
        super().__init__()
        self.trans = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])
        self.path = path
        self.file_names = file_list

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.path, self.file_names[index]))
        label = int(self.file_names[index].split(".")[0].split("_")[0])
        return self.trans(img), label

In [34]:
# ...
train_set = WACV2016(path_dest, train_list)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_set = WACV2016(path_dest, val_list)
val_loader = DataLoader(val_set, batch_size=1, shuffle=False)

model = DenseNet121(classes=3)
model = model.cuda()
citeration = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

In [35]:
# training
model.train()
for epoch in range(10):
    train_loss = 0
    train_acc = 0
    for i, (image, label) in enumerate(train_loader):
        image, label = image.cuda(), label.cuda()
        optimizer.zero_grad()
        # forward
        output = model(image)
        loss = citeration(output, label.long())
        pred = torch.torch.argmax(output, 1)
        acc = (pred == label).sum() / batch_size
        train_loss += loss.item()
        train_acc += acc.cpu().numpy()
        # backward
        loss.backward()
        optimizer.step()
    print("Epoch: {}  Loss: {}  Acc: {}".format(
        epoch+1, train_loss/len(train_loader), train_acc/len(train_loader)))

Epoch: 1  Loss: 1.06207205327052  Acc: 0.4068396226415094
Epoch: 2  Loss: 1.016757437643015  Acc: 0.45754716981132076
Epoch: 3  Loss: 0.9808529874063888  Acc: 0.4858490566037736
Epoch: 4  Loss: 0.965985248673637  Acc: 0.5530660377358491
Epoch: 5  Loss: 0.9396732517008511  Acc: 0.5554245283018868
Epoch: 6  Loss: 0.9346964966576055  Acc: 0.5554245283018868
Epoch: 7  Loss: 0.9122457976611156  Acc: 0.5872641509433962
Epoch: 8  Loss: 0.9023533677155117  Acc: 0.5801886792452831
Epoch: 9  Loss: 0.8916796198431051  Acc: 0.6002358490566038
Epoch: 10  Loss: 0.8666376910119686  Acc: 0.6202830188679245


In [36]:
# validation
model.eval()
y_pred = []
y_true = []
for i, (image, label) in enumerate(val_loader):
        image, label = image.cuda(), label.cuda()
        # forward
        output = model(image)
        pred = torch.argmax(output, 1).unsqueeze(0)
        y_pred.append(int(pred.cpu().numpy()))
        y_true.append(int(label.unsqueeze(0).cpu().numpy()))
print(y_pred)
print(y_true)

[0, 1, 1, 0, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 1, 2, 0, 1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 2, 2, 0, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 2, 0, 1, 1, 2, 2, 2, 0, 1, 0, 1, 0, 2, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 2, 1, 2, 0, 1, 2, 2, 2, 2, 0, 0, 1, 0, 1, 1, 2, 2, 0, 1, 0, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 0, 2, 1, 1, 2, 0, 2, 0, 1, 2, 0, 2, 0, 2, 1, 1, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 1, 1, 0, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 2, 1, 1, 1, 2, 1]
[0, 1, 2, 0, 1, 2, 0, 2, 2, 2, 1, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2, 2, 0, 1, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 0, 2, 1, 1, 2, 1, 1, 2, 2, 0, 0, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1, 2, 2, 2, 2, 2, 1, 2, 0, 1, 0, 1, 1, 1, 0, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 0, 1, 1, 1,

In [37]:
cm = pycm.ConfusionMatrix(y_true, y_pred, digit=5)

In [38]:
cm.ACC

{0: 0.8293838862559242, 1: 0.6587677725118484, 2: 0.6966824644549763}

In [39]:
cm.GI

{0: 0.4496461622210126, 1: 0.2933953488372092, 2: 0.36809116809116826}

In [40]:
cm.AGF

{0: 0.7020571120189314, 1: 0.6433921730733854, 2: 0.6870438018026194}