In [5]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torch.utils.data import Dataset, DataLoader, default_convert
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
import time
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from datetime import datetime

In [43]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [65]:
class GazeEstimationDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.eyes_frame = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.eyes_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = self.eyes_frame.iloc[idx, 0]
        image = io.imread(img_name)
        image = image/255
        rgb_img = np.repeat(image[..., np.newaxis], 3, -1)
        #print(rgb_img.shape, "image shape")
        face_img_coor = np.fromstring(self.eyes_frame.iloc[idx, 3][1:int(len(self.eyes_frame.iloc[idx, 3]) - 1)],
                                      sep=',',
                                      dtype=int)
        roi_eyes_coor = np.fromstring(self.eyes_frame.iloc[idx, 4][1:int(len(self.eyes_frame.iloc[idx, 4]) - 1)],
                                      sep=',', dtype=int)
        face_img = rgb_img[face_img_coor[0]: face_img_coor[0] + face_img_coor[2],
                   face_img_coor[1]: face_img_coor[1] + face_img_coor[3], :]

        opt_flow_name = self.eyes_frame.iloc[idx, 5]
        opt_flow = pd.read_csv(opt_flow_name)
        sample = {'name': img_name, 'face': face_img, 'face_coor': face_img_coor, 'eyes_coor': roi_eyes_coor,
                  'x': self.eyes_frame.iloc[idx, 1], 'y': self.eyes_frame.iloc[idx, 2]}
        '''opt_flow': opt_flow'''
        if self.transform:
            sample = self.transform(sample)
        return sample


class Rescale(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, eyes = sample['face'], sample['eyes_coor']

        h, w = image.shape[:2]
        # if isinstance(self.output_size, int):
        #    if h > w:
        #        new_h, new_w = self.output_size * h / w, self.output_size
        #    else:
        #        new_h, new_w = self.output_size, self.output_size * w / h
        # else:
        new_h, new_w = self.output_size, self.output_size
        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (new_h, new_w))

        for i in range(2):
            eyes[i] = eyes[i] * new_w / w
        for i in range(2, 4, 1):
            eyes[i] = eyes[i] * new_h / h
        eyes[4] = eyes[4] * new_w / w
        eyes[5] = eyes[5] * new_h / h

        x, y = sample['x'], sample['y']
        # opt_flow = sample['opt_flow']

        return {'face': img, 'eyes_coor': eyes, 'x': x, 'y': y}

    '''opt_flow': opt_flow'''

class ToTensor(object):
    def __call__(self, sample):
        image, eyes = sample['face'], sample['eyes_coor']
        x, y = (sample['x'] + 800) / 1600, (sample['y'] + 800) / 1600
        # opt_flow = sample['opt_flow']
        image = image.transpose((2, 0, 1))
        return {'face': torch.from_numpy(image).type(torch.DoubleTensor),
                'eyes_coor': torch.from_numpy(eyes).type(torch.DoubleTensor),
                'gt_coor': torch.tensor([x, y]).type(torch.DoubleTensor)}

    '''opt_flow': torch.from_numpy(opt_flow.values)'''

"""
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 15, 3)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(15, 5, 8)
        self.pool2 = nn.MaxPool2d(3,3)
        self.conv3 = nn.Conv2d(5, 3, 16)
        self.fc1 = nn.Linear(59536, 120)
        self.d1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(120, 84)
        self.d2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool1(F.relu(self.conv2(x)))
        x = self.pool2(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
"""

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(59536, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [45]:
dataset = GazeEstimationDataset(csv_file="full_face/total.csv", root_dir="")
transformed_dataset = GazeEstimationDataset(csv_file="full_face/total.csv", root_dir="",
                                            transform=transforms.Compose([Rescale(256), ToTensor()]))
start = datetime.now()
print("start splitting at: ", start)
train, test = train_test_split(transformed_dataset)
end = datetime.now()
print("splitting done after: ", end - start)

start splitting at:  2022-07-01 10:53:05.868896
splitting done after:  0:08:08.607975


AttributeError: 'list' object has no attribute 'to'

In [75]:
train_size = int(0.8 * len(transformed_dataset))
test_size = len(transformed_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(transformed_dataset, [train_size, test_size])

train_size = int(0.95 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [76]:
batch_size = 20
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

net = Net().to(device)
net = net.double()
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [80]:
n = 7
min_valid_loos = np.inf
for epoch in range(n):
    with tqdm(trainloader, unit="batch") as tepoch:
        running_loss = 0.0
        for data in tepoch:
            tepoch.set_description(f"Epoch {epoch}")
            inputs = data['face'].to(device)
            labels = data['gt_coor'].to(device)

            optimizer.zero_grad()

            output = net(inputs)
            train_loss = criterion(output, labels)
            train_loss.backward()
            optimizer.step()

            correct = (abs(output - labels)).sum().item()
            accuracy = 1 - (correct / batch_size)



        for data in tepoch:
            inputs = data['face'].to(device)
            labels = data['gt_coor'].to(device)
            output = net(inputs)
            loss = criterion(output, labels)
            val_loss = loss.item()*inputs.siez(0)
            dec = min_valid_loos > val_loss
            if dec:
                min_valid_loos = val_loss

        tepoch.set_postfix(train_loss=train_loss.item(), accuracy=100*accuracy, val_loss=val_loss, decreased=dec)
        time.sleep(0.1)

print('done')
path = './trained_ts.pth'
torch.save(net.state_dict(), path)

Epoch 0:   2%|▏         | 3/159 [00:17<14:45,  5.68s/batch]


KeyboardInterrupt: 

In [69]:
total = 0
correct = 0
path = './trained_ts.pth'
#net = Net()
net.load_state_dict(torch.load(path))
with torch.no_grad():
    for data in test:
        image = data['face']
        labels = data['gt_coor']
        outputs = net(image)
        _, predicted = torch.max(outputs.data, 1)
        total += 1
        correct += abs(predicted-labels)
print('overall ', correct/total)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x3721 and 59536x120)