In [1]:
import torch.utils.data as data
import scipy.io as sio
from PIL import Image
import os
import os.path
import torchvision.transforms as transforms
import torch
import numpy as np
import re
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import torch.backends.cudnn as cudnn
import time
import datetime
import shutil

In [2]:
class gazeData(data.Dataset):
    def __init__(self, dataset, imSize=(224,224)):

        self.dataset = dataset
        self.imSize = imSize

        self.transformImg = transforms.Compose([
        transforms.Resize(self.imSize),
        transforms.ToTensor()])
        self.transformPoint = transforms.Compose([
            transforms.ToTensor()
        ])
    

    def __getitem__(self, index):
#         index = self.indices[index]

        filePath = self.dataset['file'][index]
        frame = Image.open(filePath).convert('RGB')
        frame = self.transformImg(frame)   
        
        xdot = self.dataset['Xdot'][index]
        ydot = self.dataset['Ydot'][index]
        
        point = np.array([xdot,ydot],np.float32) 
        point = torch.FloatTensor(point)
        row = torch.LongTensor([int(index)])

        return row, frame, point
    
        
    def __len__(self):
        return len(self.dataset)


In [3]:
class VGG_16(nn.Module):


    def __init__(self):
        super().__init__()
        self.block_size = [2, 2, 3, 3, 3]
        self.conv_1_1 = nn.Conv2d(3, 64, 3, stride=1, padding=1)
        self.conv_1_2 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.conv_2_1 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
        self.conv_2_2 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
        self.conv_3_1 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
        self.conv_3_2 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
        self.conv_3_3 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
        self.conv_4_1 = nn.Conv2d(256, 512, 3, stride=1, padding=1)
        self.conv_4_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.conv_4_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.conv_5_1 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.conv_5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.conv_5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.fc6 = nn.Linear(512 * 7 * 7, 4096)
        self.fc7 = nn.Linear(4096, 2622)
        self.fc8 = nn.Linear(2622,512)
        self.fc9 = nn.Linear(512,64)
        self.fc10 = nn.Linear(64,2)

    def forward(self, x):

        x = F.relu(self.conv_1_1(x))
        x = F.relu(self.conv_1_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2_1(x))
        x = F.relu(self.conv_2_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_3_1(x))
        x = F.relu(self.conv_3_2(x))
        x = F.relu(self.conv_3_3(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_4_1(x))
        x = F.relu(self.conv_4_2(x))
        x = F.relu(self.conv_4_3(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_5_1(x))
        x = F.relu(self.conv_5_2(x))
        x = F.relu(self.conv_5_3(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc6(x))
        x = F.dropout(x, 0.5, self.training)
        x = F.relu(self.fc7(x))
        x = F.dropout(x, 0.5, self.training)
        x = self.fc8(x)
        x = F.dropout(x, 0.2, self.training)
        x = self.fc9(x)
        x = F.dropout(x, 0.2, self.training)
        x = self.fc10(x)
        return x

In [4]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [5]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    CHECKPOINTS_PATH = '.'
    if not os.path.isdir('.'):
        os.makedirs(CHECKPOINTS_PATH, 0o777)
    bestFilename = os.path.join(CHECKPOINTS_PATH, 'best_' + filename)
    filename = os.path.join(CHECKPOINTS_PATH, filename)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestFilename)

In [6]:
def load_checkpoint(filename='./checkpoint.pth.tar'):
    print(filename)
    if not os.path.isfile(filename):
        return None
    state = torch.load(filename)
    return state

In [7]:
def adjust_learning_rate(optimizer, epoch):
    lr = 0.0001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.state_dict()['param_groups']:
        param_group['lr'] = lr

In [8]:
def train(train_loader, model, criterion,optimizer, epoch):
    global count
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    model.train()

    end = time.time()

    for i,(row,frame,gaze) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)
        gaze = gaze.cuda()
        frame = frame.cuda()

        gaze = torch.autograd.Variable(gaze, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)

        output = model(frame)
        
        loss = criterion(output, gaze)

        losses.update(loss.data.item(), frame.size(0))


        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        count=count+1
        if(count%100==0):
            print('Epoch (train): [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))
            print(str(datetime.datetime.now().time()))
                


In [9]:
def validate(val_loader, model, criterion, epoch):
    global count_test,count
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    lossesLin = AverageMeter()

    model.eval()
    end = time.time()

    for i,(row,frame,gaze) in enumerate(val_loader):

        # measure data loading time
        data_time.update(time.time() - end)
        gaze = gaze.cuda()
        frame = frame.cuda()

        gaze = torch.autograd.Variable(gaze, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)

        
        with torch.no_grad():
            output = model(frame)

        loss = criterion(output, gaze)

        lossLin = output - gaze
        lossLin = torch.mul(lossLin,lossLin)
        lossLin = torch.sum(lossLin,1)
        lossLin = torch.mean(torch.sqrt(lossLin))
                             
        losses.update(loss.data.item(), frame.size(0))
        lossesLin.update(lossLin.item(), frame.size(0))
                             
        batch_time.update(time.time() - end)
        end = time.time()
        count=count+1
        if(count%100==0):
            print('Epoch (val): [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Error L2 {lossLin.val:.4f} ({lossLin.avg:.4f})\t'.format(
                    epoch, i, len(val_loader), batch_time=batch_time,
                   loss=losses,lossLin=lossesLin))

    return lossesLin.avg

In [10]:
workers = 16
epochs = 25
batch_size = 64
weight_decay = 1e-4
print_freq = 1
prec1 = 0
best_prec1 = 1e20
lr = 0.0001

count_test = 0
count = 0

In [11]:
df_gaze = pd.read_csv('gazeData.csv')

df_train=df_gaze.sample(frac=0.9,random_state=100) 
df_tmp= df_gaze.drop(df_train.index)

df_train.reset_index(inplace=True)
df_tmp.reset_index(inplace=True)

df_val = df_tmp.sample(frac=0.5, random_state = 100)
df_test = df_tmp.drop(df_val.index)

df_val.reset_index(inplace=True)
df_test.reset_index(inplace=True)

In [12]:
print('train',len(df_train),'val',len(df_val),'test',len(df_test))

train 737485 val 40972 test 40971


In [13]:
dataTrain = gazeData(dataset=df_train)
dataVal = gazeData(dataset=df_val)

In [14]:
train_loader = torch.utils.data.DataLoader(
        dataTrain,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
        dataVal,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)

In [15]:
model = VGG_16()
model = torch.nn.DataParallel(model)
model.cuda()
imSize=(224,224)
cudnn.benchmark = True   
criterion = nn.MSELoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr)

In [16]:
epoch = 0
# saved = load_checkpoint()
# if saved:
#     print('Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1']))
#     state = saved['state_dict']
#     try:
#         model.module.load_state_dict(state)
#     except:
#         model.load_state_dict(state)
#     epoch = saved['epoch']
#     best_prec1 = saved['best_prec1']
# else:
#     print('Warning: Could not read checkpoint!')

In [17]:
 print(str(datetime.datetime.now().time()))

16:51:12.802245


In [18]:
for epoch in range(0, epoch):
        adjust_learning_rate(optimizer, epoch)
for epoch in range(epoch, epochs):
    adjust_learning_rate(optimizer, epoch)
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion, epoch)

    # remember best prec@1 and save checkpoint
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
    }, is_best)

Epoch (train): [0][99/11524]	Time 0.284 (0.320)	Data 0.000 (0.019)	Loss 24731.5000 (26130.4508)	
16:51:44.806344
Epoch (train): [0][199/11524]	Time 0.276 (0.299)	Data 0.000 (0.009)	Loss 25046.5449 (26417.8746)	
16:52:12.511124
Epoch (train): [0][299/11524]	Time 0.276 (0.291)	Data 0.000 (0.006)	Loss 27560.9258 (26300.2534)	
16:52:40.190277
Epoch (train): [0][399/11524]	Time 0.277 (0.288)	Data 0.000 (0.005)	Loss 29904.3770 (26397.2224)	
16:53:07.858947
Epoch (train): [0][499/11524]	Time 0.276 (0.285)	Data 0.000 (0.004)	Loss 27503.4629 (26344.8580)	
16:53:35.541150
Epoch (train): [0][599/11524]	Time 0.276 (0.284)	Data 0.000 (0.003)	Loss 29753.2715 (26320.6930)	
16:54:03.202079
Epoch (train): [0][699/11524]	Time 0.281 (0.283)	Data 0.000 (0.003)	Loss 22087.3516 (26310.5666)	
16:54:30.878863
Epoch (train): [0][799/11524]	Time 0.276 (0.282)	Data 0.000 (0.002)	Loss 27247.9160 (26276.4154)	
16:54:58.536740
Epoch (train): [0][899/11524]	Time 0.276 (0.282)	Data 0.000 (0.002)	Loss 25312.0371 (2631