In [1]:
import torch.utils.data as data
import scipy.io as sio
from PIL import Image
import os
import os.path
import torchvision.transforms as transforms
import torch
import numpy as np
import re
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import torch.backends.cudnn as cudnn
import time
import datetime
import shutil

In [2]:
class gazeData(data.Dataset):
    def __init__(self, dataset, imSize=(224,224)):

        self.dataset = dataset
        self.imSize = imSize
        
        self.transformImg = transforms.Compose([transforms.Resize(self.imSize)
                                                ,transforms.ToTensor(),
                                                 transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
        self.classLabel = [0]*16
#         self.transformPoint = transforms.Compose([
#             transforms.ToTensor()
#         ])
    

    def __getitem__(self, index):
#         index = self.indices[index]

        filePath = self.dataset['file'][index]
        image = Image.open(filePath).convert('RGB')
        image = self.transformImg(image)   

        labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        labels[self.dataset['location'][index]] = 1
        labels = torch.FloatTensor(labels)
        sample = {'image': image, 'labels': labels}
        
        
        return sample
    
        
    def __len__(self):
        return len(self.dataset)


In [3]:
import torch.nn as nn
import math

__all__ = ['ResNet', 'resnet50']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=16, include_top=True):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def resnet50(**kwargs):
    """Constructs a ResNet-50 model.
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model



In [4]:

def vggface(pretrained=False, **kwargs):
    """VGGFace model.
    Args:
        pretrained (bool): If True, returns pre-trained model 
    """
    model = VggFace(**kwargs)
    if pretrained:
        state = torch.utils.model_zoo.load_url(MODEL_URL)
        model.load_state_dict(state)
    return model


class VggFace(torch.nn.Module):
    def __init__(self, classes=16):
        """VGGFace model.
        Face recognition network.  It takes as input a Bx3x224x224
        batch of face images and gives as output a BxC score vector
        (C is the number of identities).
        Input images need to be scaled in the 0-1 range and then 
        normalized with respect to the mean RGB used during training.
        Args:
            classes (int): number of identities recognized by the
            network
        """
        super().__init__()
        self.conv1 = _ConvBlock(3, 64, 64)
        self.conv2 = _ConvBlock(64, 128, 128)
        self.conv3 = _ConvBlock(128, 256, 256, 256)
        self.conv4 = _ConvBlock(256, 512, 512, 512)
        self.conv5 = _ConvBlock(512, 512, 512, 512)
        self.dropout = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(7 * 7 * 512, 4096)
        self.fc2 = torch.nn.Linear(4096, 4096)
        self.fc3 = torch.nn.Linear(4096, classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

class _ConvBlock(torch.nn.Module):
    """A Convolutional block."""

    def __init__(self, *units):
        """Create a block with len(units) - 1 convolutions.
        convolution number i transforms the number of channels from 
        units[i - 1] to units[i] channels.
        """
        super().__init__()
        self.convs = torch.nn.ModuleList([
            torch.nn.Conv2d(in_, out, 3, 1, 1)
            for in_, out in zip(units[:-1], units[1:])
        ])
        
    def forward(self, x):
        # Each convolution is followed by a ReLU, then the block is
        # concluded by a max pooling.
        for c in self.convs:
            x = F.relu(c(x))
        return F.max_pool2d(x, 2, 2, 0, ceil_mode=True)
    


In [5]:
class SEModule(nn.Module):

    def __init__(self, planes, compress_rate):
        super(SEModule, self).__init__()
        self.conv1 = nn.Conv2d(planes, planes // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv2 = nn.Conv2d(planes // compress_rate, planes, kernel_size=1, stride=1, bias=True)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = F.avg_pool2d(module_input, kernel_size=module_input.size(2))
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.sigmoid(x)
        return module_input * x


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        # SENet
        compress_rate = 16
        # self.se_block = SEModule(planes * 4, compress_rate)  # this is not used.
        self.conv4 = nn.Conv2d(planes * 4, planes * 4 // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv5 = nn.Conv2d(planes * 4 // compress_rate, planes * 4, kernel_size=1, stride=1, bias=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)


        ## senet
        out2 = F.avg_pool2d(out, kernel_size=out.size(2))
        out2 = self.conv4(out2)
        out2 = self.relu(out2)
        out2 = self.conv5(out2)
        out2 = self.sigmoid(out2)
        # out2 = self.se_block.forward(out)  # not used

        if self.downsample is not None:
            residual = self.downsample(x)

        out = out2 * out + residual
        # out = out2 + residual  # not used
        out = self.relu(out)
        return out


class SENet(nn.Module):

    def __init__(self, block, layers, num_classes=16, include_top=True):
        self.inplanes = 64
        super(SENet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


def senet50(**kwargs):
    """Constructs a SENet-50 model.
    """
    model = SENet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model

In [6]:
def save_checkpoint(state, is_best, filename='checkpointClass.pth.tar'):
    CHECKPOINTS_PATH = './gazeClassCheckpoint'
    if not os.path.isdir(CHECKPOINTS_PATH):
        os.makedirs(CHECKPOINTS_PATH, 0o777)
    bestFilename = os.path.join(CHECKPOINTS_PATH, 'best_' + filename)
    filename = os.path.join(CHECKPOINTS_PATH, filename)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestFilename)
        
def load_checkpoint(filename='./gazeClassCheckpoint/checkpointClass.pth.tar'):
    print(filename)
    if not os.path.isfile(filename):
        return None
    state = torch.load(filename)
    return state

In [7]:
def adjust_learning_rate(optimizer, epoch):
    lr = 0.0001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.state_dict()['param_groups']:
        param_group['lr'] = lr

In [9]:
workers = 16
epochs = 25
batch_size = 64
weight_decay = 1e-4
best_loss = 1000
lr = 0.0001

In [10]:
df_gaze = pd.read_csv('gazeClassData.csv')
df_gaze =df_gaze.drop(['index'],axis=1)

df_train=df_gaze.sample(frac=0.9,random_state=100) 
df_tmp= df_gaze.drop(df_train.index)

df_train.reset_index(inplace=True)
df_tmp.reset_index(inplace=True)

df_val = df_tmp.sample(frac=0.5, random_state = 100)
df_test = df_tmp.drop(df_val.index)

df_val.reset_index(inplace=True)
df_test.reset_index(inplace=True)

In [11]:
df_train.head(5)

Unnamed: 0,index,location,file
0,438573,2,./data/01849/frames/00743.jpg
1,703392,12,./data/02945/frames/00833.jpg
2,158947,11,./data/00831/frames/00307.jpg
3,779797,15,./data/03312/frames/00838.jpg
4,587944,15,./data/02416/frames/00086.jpg


In [12]:
print('train',len(df_train),'val',len(df_val),'test',len(df_test))

train 737485 val 40972 test 40971


In [13]:
dataTrain = gazeData(dataset=df_train)
dataVal = gazeData(dataset=df_val)
dataTest = gazeData(dataset=df_test)

In [15]:
train_loader = torch.utils.data.DataLoader(
        dataTrain,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
        dataVal,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(
        dataTest,
        batch_size=1, shuffle=True,
        num_workers=workers, pin_memory=True)

In [16]:
model = resnet50()
# model = senet50()
# model =vggface()
model = torch.nn.DataParallel(model)
model.cuda()
cudnn.benchmark = True   
criterion = nn.MSELoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr)

In [17]:
epoch =0
saved = load_checkpoint()
if saved:
    print('Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1']))
    state = saved['state_dict']
    try:
        model.module.load_state_dict(state)
    except:
        model.load_state_dict(state)
    epoch = saved['epoch']
    best_prec1 = saved['best_prec1']
else:
    print('Warning: Could not read checkpoint!')

./gazeClassCheckpoint/checkpointClass.pth.tar
Loading checkpoint for epoch 00002 with loss 0.00019 (which is the mean squared error not the actual linear error)...


In [18]:
def train(train_loader, model, criterion,optimizer, epoch):
    model.train()
    end = time.time()
    running_loss = 0
    for i,sample in enumerate(train_loader):
        frame, locationClass= sample['image'],sample['labels']

        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        optimizer.zero_grad()

        output = model(frame)
        
        loss = criterion(output, locationClass)
        
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
        if i % 200 == 0:    # print every 2000 mini-batches
            print('Train [%d, %d / %d] loss: %.3f' %
                  (epoch + 1, i + 1,len(train_loader), running_loss / 200))
            running_loss = 0.0
            print(str(datetime.datetime.now().time()))

def validate(val_loader, model, criterion,optimizer, epoch) :

    model.eval()
    end = time.time()
    val_loss = 0
    for i,sample in enumerate(val_loader):
        frame, locationClass= sample['image'],sample['labels']

        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        optimizer.zero_grad()

        with torch.no_grad():
            output = model(frame)
        
        loss = criterion(output, locationClass)
        
        loss.backward()
        optimizer.step()

        val_loss += loss.item()
        if i % 200 == 0:   
            print('Validate [%d, %5d / %5d ] loss: %.3f' %
                  (epoch + 1, i + 1,len(val_loader) , val_loss / (i+1)))
            print(str(datetime.datetime.now().time()))
        return val_loss/len(val_loader)
    
def TestData(test_loader, model) :

    model.eval()
    correct = 0
    total = 0
    for i,sample in enumerate(test_loader):
        frame, locationClass= sample['image'],sample['labels']
        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        output = model(frame)
        _, predicted = torch.max(output.data, 1)
        total += locationClass.size(0)
        correct += (predicted == locationClass).sum().item()
        
    print('Test set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)) )

In [None]:
print('EPOCH >',epoch)
for epoch in range(epoch, epochs):
    adjust_learning_rate(optimizer, epoch)

    train(train_loader, model, criterion, optimizer, epoch)

    val_loss = validate(val_loader, model, criterion,optimizer, epoch)
    
    TestData(test_loader,model)
    # remember best prec@1 and save checkpoint
    is_best = val_loss < best_loss
    best_loss = min(val_loss, best_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec1': best_loss,
    }, is_best)

In [None]:
test_loader = torch.utils.data.DataLoader(
        data,
        batch_size=1, shuffle=True,
        num_workers=workers, pin_memory=True)
TestData(test_loader,model)