In [442]:
from __future__ import print_function, division
import math
import torch
import pandas as pd
from skimage import transform
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

In [443]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# mode 0: read gray image with 3 channels and use those as inputs
# mode 1: read gray image with 1 channel and use this only as input
# mode 2: read gray image as one channel and add opt flow dx and dy to second and third channel
mode = 2

In [444]:
def get_opt_flow(p):
    o_df = pd.read_csv(p)
    o_df = o_df.drop(columns=[' 1st best', ' 2nd best'])
    o_df = o_df.rename(columns={'# x': 'x', ' y': 'y', ' dx': 'dx', ' dy': 'dy'})
    o_df = o_df.to_numpy()
    #norm = np.linalg.norm(o_df)
    #o_df = o_df / norm
    return o_df

In [445]:
class GazeEstimationDataset(Dataset):
    def __init__(self, csv_file, root_dir, trans=None):
        self.root_dir = root_dir
        self.trans = trans
        self.frame = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = self.frame.iloc[idx, 0]
        img = cv2.imread(img_name)
        img_gray = cv2.imread(img_name, 0)
        x, y = img.shape[0], img.shape[1]
        if mode == 1:
            img_norm = cv2.normalize(img_gray, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            img_norm = img_norm.reshape((x, y, 1))
        elif mode == 0:
            img_norm = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        elif mode == 2:
            img_norm = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            img_norm[:, :, 1:3] = 0
            opt_path = self.frame.iloc[idx, 5]
            opt_path = "/Users/PBL/PycharmProjects/sem_10/semester_thesis/" + opt_path
            opt_flow_df_norm = get_opt_flow(opt_path)
            for row in opt_flow_df_norm:
                ox, oy, odx, ody = row[0], row[1], row[2], row[3]
                if not(dx==0) or not(dy==0):
                    img_norm[oy, ox, 1] = dx
                    img_norm[oy, ox, 2] = dy
        else:
            print('wrong mode')
            return
        face_img_coor = np.fromstring(self.frame.iloc[idx, 3][1:int(len(self.frame.iloc[idx, 3]) - 1)],
                                      sep=',', dtype=int)
        fx, fy, fw, fh = face_img_coor[0], face_img_coor[1], face_img_coor[2], face_img_coor[3]
        face_img = img_norm[fy:fy+fh, fx:fx+fw, :]

        x_gt = (self.frame.iloc[idx, 1] + 800) / 1600
        y_gt = (self.frame.iloc[idx, 2] + 800) / 1600

        sample = {'face': face_img, 'x': x_gt, 'y': y_gt}
        if self.trans:
            sample = self.trans(sample)
        return sample


class Rescale(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        img = sample['face']

        # h, w = image.shape[:2]
        # if isinstance(self.output_size, int):
        #    if h > w:
        #        new_h, new_w = self.output_size * h / w, self.output_size
        #    else:
        #        new_h, new_w = self.output_size, self.output_size * w / h
        # else:
        new_h, new_w = self.output_size, self.output_size
        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(img, (new_h, new_w))

        x, y = sample['x'], sample['y']

        return {'face': img, 'x': x, 'y': y}

class ToTensor(object):
    def __call__(self, sample):
        img = sample['face']
        x, y = sample['x'], sample['y']
        img = img.transpose((2, 0, 1))
        return {'face': torch.from_numpy(img).type(torch.DoubleTensor),
                'gt_coor': torch.tensor([x, y]).type(torch.DoubleTensor)}


class NetFaceGray(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(59536, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


class NetFace3Chanel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(59536, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [446]:
class GazeEstimationDatasetEyes(Dataset):
    def __init__(self, csv_file, root_dir, trans=None):
        self.frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.trans = trans

    def __len__(self):
        return int(len(self.frame))

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = self.frame.iloc[idx, 0]
        img = cv2.imread(img_name)
        img_gray = cv2.imread(img_name, 0)
        x, y = int(img_gray.shape[0]), int(img_gray.shape[1])
        if mode == 1:
            img_norm_eyes = cv2.normalize(img_gray, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            img_norm_eyes = img_norm_eyes.reshape((x, y, 1))
        elif mode == 0:
            img_norm_eyes = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        elif mode == 2:
            img_norm_eyes = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            img_norm_eyes[:, :, 1:3] = 0
            opt_path = self.frame.iloc[idx, 5]
            opt_path = "/Users/PBL/PycharmProjects/sem_10/semester_thesis/" + opt_path
            opt_flow_df_norm = get_opt_flow(opt_path)
            for row in opt_flow_df_norm:
                ox, oy, odx, ody = row[0], row[1], row[2], row[3]
                if not(dx==0) or not(dy==0):
                    img_norm_eyes[oy, ox, 1] = dx
                    img_norm_eyes[oy, ox, 2] = dy
        else:
            print('wrong mode')
            return


        eyes_roi = np.fromstring(self.frame.iloc[idx, 4][1:int(len(self.frame.iloc[idx, 4]) - 1)],
                                      sep=',', dtype=int)
        x_eye, y_eye, w, h = eyes_roi[0], eyes_roi[1], eyes_roi[2], eyes_roi[3]

        eyes_img = img_norm_eyes[y_eye: y_eye + h, x_eye: x_eye + w, :]

        x_gt = (self.frame.iloc[idx, 1] + 800) / 1600
        y_gt = (self.frame.iloc[idx, 2] + 800) / 1600

        sample = {'eyes_img': eyes_img, 'x': x_gt, 'y': y_gt}

        if self.trans:
            sample = self.trans(sample)

        return sample

class RescaleEyes(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        img = sample['eyes_img']

        # h, w = image.shape[:2]
        # if isinstance(self.output_size, int):
        #    if h > w:
        #        new_h, new_w = self.output_size * h / w, self.output_size
        #    else:
        #        new_h, new_w = self.output_size, self.output_size * w / h
        # else:
        new_h, new_w = self.output_size, self.output_size
        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(img, (new_h, new_w))

        x, y = sample['x'], sample['y']

        return {'eyes_img': img, 'x': x, 'y': y}

class ToTensorEyes(object):
    def __call__(self, sample):
        img = sample['eyes_img']
        x, y = sample['x'], sample['y']
        img = img.transpose((2, 0, 1))
        return {'eyes_img': torch.from_numpy(img).type(torch.DoubleTensor),
                'gt_coor': torch.tensor([x, y]).type(torch.DoubleTensor)}

class NetEyesGray(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(13456, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

class NetEyesRGB(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(13456, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [447]:
transformed_dataset = GazeEstimationDataset(csv_file="full_face/total.csv", root_dir="",
                                            trans=transforms.Compose([Rescale(256), ToTensor()]))
transformed_dataset_eyes = GazeEstimationDatasetEyes(csv_file="full_face/total.csv", root_dir="",
                                            trans=transforms.Compose([RescaleEyes(128), ToTensorEyes()]))

datasets containing full face pictures

In [448]:
train_size = int(0.8 * len(transformed_dataset))
test_size = len(transformed_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(transformed_dataset, [train_size, test_size])

train_size = int(0.95 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [449]:
batch_size = 15
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

model_gray = NetFaceGray().to(device)
model_gray = model_gray.double()
criterion_gray = nn.MSELoss().to(device)
optimizer_gray = optim.Adam(model_gray.parameters(), lr=0.001)

model_rgb = NetFace3Chanel().to(device)
model_rgb = model_rgb.double()
criterion_rgb = nn.MSELoss().to(device)
optimizer_rgb = optim.Adam(model_rgb.parameters(), lr=0.001)

In [None]:
n = 30
train_loss_arr = []
val_loss_arr = []
train_dist_arr = []
val_dist_arr = []
for epoch in range(n):
    with tqdm(trainloader, unit="batch") as tepoch:
        for data in tepoch:
            tepoch.set_description(f"Training {epoch}")
            faces = data['face']
            labels = data['gt_coor']

            if mode == 1:
                optimizer_gray.zero_grad()
                output = model_gray(faces)
                train_loss = criterion_gray(output, labels)
                train_loss.backward()
                optimizer_gray.step()
            elif mode == 0 or mode == 2:
                optimizer_rgb.zero_grad()
                output = model_rgb(faces)
                train_loss = criterion_rgb(output, labels)
                train_loss.backward()
                optimizer_rgb.step()
            else:
                print('wrong mode')
                break

            batch_dist = 0

            for i in range (len(output)):
                out_x, out_y = output[i][0], output[i][1]
                lab_x, lab_y = labels[i][0], labels[i][1]
                dx = out_x - lab_x
                dy = out_y - lab_y
                dist = math.sqrt(dx*dx + dy*dy)
                batch_dist += dist

            label_name = "Epoch " + str(epoch)

            #correct = (abs(output - labels)).sum().item()
            distance = batch_dist / len(output)
            train_dist_arr.append(distance*100)
            train_loss_arr.append(train_loss.item())
            tepoch.set_postfix(train_loss=train_loss.item(), distance=100*distance)

        #print("train loss value: ", running_loss/len(trainloader))

    with tqdm(valloader, unit="batch") as tepoch:
        with torch.no_grad():
            if mode == 1:
                model_gray.eval()
                for data in tepoch:
                    tepoch.set_description(f"Validation {epoch}")
                    faces = data['face']
                    labels = data['gt_coor']
                    #labels = [0.5, 0.5]
                    output = model_gray(faces)
                    loss = criterion_gray(output, labels)
                    val_loss = loss.item()*faces.size(0)
                    batch_dist = 0
                    for i in range (len(output)):
                        out_x, out_y = output[i][0], output[i][1]
                        lab_x, lab_y = labels[i][0], labels[i][1]
                        dx = out_x - lab_x
                        dy = out_y - lab_y
                        dist = math.sqrt(dx*dx + dy*dy)
                        batch_dist += dist

                    val_dist = batch_dist / len(output)
                    val_dist_arr.append(val_dist*100)
                    val_loss_arr.append(val_loss)
                    tepoch.set_postfix(val_loss=val_loss, val_dist=100*val_dist)
            elif mode == 0 or mode == 2:
                model_rgb.eval()
                for data in tepoch:
                    tepoch.set_description(f"Validation {epoch}")
                    faces = data['face']
                    labels = data['gt_coor']
                    #labels = [0.5, 0.5]
                    output = model_rgb(faces)
                    loss = criterion_rgb(output, labels)
                    val_loss = loss.item()*faces.size(0)
                    batch_dist = 0
                    for i in range (len(output)):
                        out_x, out_y = output[i][0], output[i][1]
                        lab_x, lab_y = labels[i][0], labels[i][1]
                        dx = out_x - lab_x
                        dy = out_y - lab_y
                        dist = math.sqrt(dx*dx + dy*dy)
                        batch_dist += dist

                    val_dist = batch_dist / len(output)
                    val_dist_arr.append(val_dist*100)
                    val_loss_arr.append(val_loss)
                    tepoch.set_postfix(val_loss=val_loss, val_dist=100*val_dist)

print('done')
if mode == 1:
    path = './trained_face_gray.pth'
    torch.save(model_gray.state_dict(), path)
elif mode == 0:
    path = './trained_face_3chanel.pth'
    torch.save(model_rgb.state_dict(), path)
elif mode == 2:
    path = './trained_face_opt.pth'
    torch.save(model_rgb.state_dict(), path)

Training 0:  15%|█▌        | 32/212 [01:35<08:34,  2.86s/batch, distance=35.9, train_loss=0.0707]

visualizing the distance and loss from previous training with face

In [None]:
x_train_dist = [x for x in range(len(train_dist_arr))]
x_train_loss = [x for x in range(len(train_loss_arr))]

x_val_dist = [x for x in range(len(val_dist_arr))]
x_val_loss = [x for x in range(len(val_loss_arr))]

fig, axs = plt.subplots(2,2, figsize=(16,9))

axs[0,0].plot(x_train_dist, train_dist_arr)
axs[0,0].set_title('Training distances')
axs[0,1].plot(x_train_loss, train_loss_arr)
axs[0,1].set_title('Training losses')
axs[1,0].plot(x_val_dist, val_dist_arr)
axs[1,0].set_title('Validation distances')
axs[1,1].plot(x_val_loss, val_loss_arr)
axs[1,1].set_title('Validation losses')

if mode == 1:
    plt.savefig("face_gray.jpg")
elif mode == 0:
    plt.savefig("face_3channel.jpg")
elif mode == 2:
    plt.savefig("face_opt.jpg")

In [None]:
if mode == 1:
    path = './trained_face_gray.pth'
    model_gray.load_state_dict(torch.load(path))
    distances_face = []
    gt_face = []
    guess_face = []
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for data in tepoch:
                image = data['face']
                #labels = [0.5, 0.5]
                labels = data['gt_coor']
                gt_face.append(labels)
                output = model_gray(image)
                guess_face.append(output)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_face.append(distance)
                tepoch.set_postfix(distance=100*distance)
    print("done")
#still TODO gt, distances etc
elif mode == 0:
    path = './trained_face_3chanel.pth'
    model_rgb.load_state_dict(torch.load(path))
    distances_face_3channel = []
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for data in tepoch:
                image = data['face']
                #labels = [0.5, 0.5]
                labels = data['gt_coor']
                output = model_rgb(image)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_face_3channel.append(distance)
                tepoch.set_postfix(distance=100*distance)
    print("done")
elif mode == 2:
    path = './trained_face_opt.pth'
    model_rgb.load_state_dict(torch.load(path))
    distances_face_opt = []
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for data in tepoch:
                image = data['face']
                #labels = [0.5, 0.5]
                labels = data['gt_coor']
                output = model_rgb(image)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_face_opt.append(distance)
                tepoch.set_postfix(distance=100*distance)
    print("done")

visualizing the testing performance

dataset containing only eye portion 40FPS

In [None]:
train_size = int(0.8 * len(transformed_dataset_eyes))
test_size = len(transformed_dataset_eyes) - train_size
train_dataset_eyes, test_dataset_eyes = torch.utils.data.random_split(transformed_dataset_eyes, [train_size, test_size])

train_size = int(0.95 * len(train_dataset_eyes))
val_size = len(train_dataset_eyes) - train_size
train_dataset_eyes, val_dataset_eyes = torch.utils.data.random_split(train_dataset_eyes, [train_size, val_size])

In [None]:
batch_size = 15
trainloader_eyes = DataLoader(train_dataset_eyes, batch_size=batch_size, shuffle=True, num_workers=0)
valloader_eyes = DataLoader(val_dataset_eyes, batch_size=batch_size, shuffle=True, num_workers=0)
testloader_eyes = DataLoader(test_dataset_eyes, batch_size=batch_size, shuffle=False, num_workers=0)


model_eyes_gray = NetEyesGray().to(device)
model_eyes_gray = model_eyes_gray.double()
criterion_eyes_gray = nn.MSELoss().to(device)
optimizer_eyes_gray = optim.Adam(model_eyes_gray.parameters(), lr=0.001)

model_eyes_3chanel = NetEyesRGB().to(device)
model_eyes_3chanel = model_eyes_3chanel.double()
criterion_eyes_3chanel = nn.MSELoss().to(device)
optimizer_eyes_3chanel = optim.Adam(model_eyes_3chanel.parameters(), lr=0.001)

In [None]:
n = 30
min_valid_loos = np.inf
train_eyes_loss_arr = []
val_eyes_loss_arr = []
train_eyes_dist_arr = []
val_eyes_dist_arr = []
for epoch in range(n):
    with tqdm(trainloader_eyes, unit="batch") as tepoch:
        for data in tepoch:
            tepoch.set_description(f"Training {epoch}")
            inputs = data['eyes_img']
            labels = data['gt_coor']
            optical_flow = data['opt_flow']
            if mode == 1:
                optimizer_eyes_gray.zero_grad()
                output = model_eyes_gray(inputs)
                train_loss = criterion_eyes_gray(output, labels)
                train_loss.backward()
                optimizer_eyes_gray.step()
            elif mode == 0 or mode == 2:
                optimizer_eyes_3chanel.zero_grad()
                output = model_eyes_3chanel(inputs)
                train_loss = criterion_eyes_3chanel(output, labels)
                train_loss.backward()
                optimizer_eyes_3chanel.step()

            batch_dist = 0
            for i in range (len(output)):
                out_x, out_y = output[i][0], output[i][1]
                lab_x, lab_y = labels[i][0], labels[i][1]
                dx = out_x - lab_x
                dy = out_y - lab_y
                dist = math.sqrt(dx*dx + dy*dy)
                batch_dist += dist

            distance = batch_dist / len(output)
            train_eyes_dist_arr.append(distance*100)
            train_eyes_loss_arr.append(train_loss.item())
            tepoch.set_postfix(train_loss=train_loss.item(), distance=100*distance)


    with tqdm(valloader_eyes, unit="batch") as tepoch:
        with torch.no_grad():
            if mode == 1:
                model_eyes_gray.eval()
                for data in tepoch:
                    tepoch.set_description(f"Validation {epoch}")
                    inputs = data['eyes_img']
                    labels = data['gt_coor']
                    #labels = [0.5, 0.5]
                    output = model_eyes_gray(inputs)
                    loss = criterion_eyes_gray(output, labels)
                    val_loss = loss.item()*inputs.size(0)
                    batch_dist = 0
                    for i in range (len(output)):
                        out_x, out_y = output[i][0], output[i][1]
                        lab_x, lab_y = labels[i][0], labels[i][1]
                        dx = out_x - lab_x
                        dy = out_y - lab_y
                        dist = math.sqrt(dx*dx + dy*dy)
                        batch_dist += dist

                    val_dist = batch_dist / len(output)
                    val_eyes_dist_arr.append(val_dist*100)
                    val_eyes_loss_arr.append(val_loss)
                    tepoch.set_postfix(val_loss=val_loss, val_acc=100*val_dist)
            if mode == 0 or mode == 2:
                model_eyes_3chanel.eval()
                for data in tepoch:
                    tepoch.set_description(f"Validation {epoch}")
                    inputs = data['eyes_img']
                    labels = data['gt_coor']
                    #labels = [0.5, 0.5]
                    output = model_eyes_3chanel(inputs)
                    loss = criterion_eyes_3chanel(output, labels)
                    val_loss = loss.item()*inputs.size(0)
                    batch_dist = 0
                    for i in range (len(output)):
                        out_x, out_y = output[i][0], output[i][1]
                        lab_x, lab_y = labels[i][0], labels[i][1]
                        dx = out_x - lab_x
                        dy = out_y - lab_y
                        dist = math.sqrt(dx*dx + dy*dy)
                        batch_dist += dist

                    val_dist = batch_dist / len(output)
                    val_eyes_dist_arr.append(val_dist*100)
                    val_eyes_loss_arr.append(val_loss)
                    tepoch.set_postfix(val_loss=val_loss, val_acc=100*val_dist)


print('done')
if mode == 1:
    path = './trained_eyes_gray.pth'
    torch.save(model_eyes_gray.state_dict(), path)
elif mode == 0:
    path = './trained_eyes_3chanel.pth'
    torch.save(model_eyes_3chanel.state_dict(), path)
elif mode == 2:
    path = './trained_eyes_opt.pth'
    torch.save(model_eyes_3chanel.state_dict(), path)

visualizing performance for eye region only

In [None]:
x_train_dist = [x for x in range(len(train_eyes_dist_arr))]
x_train_loss = [x for x in range(len(train_eyes_loss_arr))]

x_val_dist = [x for x in range(len(val_eyes_dist_arr))]
x_val_loss = [x for x in range(len(val_eyes_loss_arr))]

fig, axs = plt.subplots(2,2, figsize=(16,9))

axs[0,0].plot(x_train_dist, train_eyes_dist_arr)
axs[0,0].set_title('Training distances')
axs[0,1].plot(x_train_loss, train_eyes_loss_arr)
axs[0,1].set_title('Training losses')
axs[1,0].plot(x_val_dist, val_eyes_dist_arr)
axs[1,0].set_title('Validation distances')
axs[1,1].plot(x_val_loss, val_eyes_loss_arr)
axs[1,1].set_title('Validation losses')

if mode == 1:
    plt.savefig("eye_gray.jpg")
elif mode == 0:
    plt.savefig("eye_3channel.jpg")
elif mode == 2:
    plt.savefig("eye_opt.jpg")

In [None]:
if mode == 1:
    distances_eyes = []
    gt_eyes = []
    guess_eyes = []
    path = './trained_eyes_gray.pth'
    model_eyes_gray.load_state_dict(torch.load(path))
    with torch.no_grad():
        with tqdm(testloader_eyes, unit="batch") as tepoch:
            for data in tepoch:
                image = data['eyes_img']
                labels = data['gt_coor']
                gt_eyes.append(labels)
                # labels = [0.5, 0.5]
                output = model_eyes_gray(image)
                guess_eyes.append(output)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_eyes.append(distance*100)
                tepoch.set_postfix(distance=100*distance)
    print("done")

# still TODO gt, distances etc
elif mode == 0:
    path = './trained_eyes_3chanel.pth'
    model_eyes_3chanel.load_state_dict(torch.load(path))
    distances_eyes_3channel = []
    with torch.no_grad():
        with tqdm(testloader_eyes, unit="batch") as tepoch:
            for data in tepoch:
                image = data['eyes_img']
                labels = data['gt_coor']
                # labels = [0.5, 0.5]
                output = model_eyes_3chanel(image)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_eyes_3channel.append(distance*100)
                tepoch.set_postfix(distance=100*distance)
    print("done")
elif mode == 2:
    path = './trained_eyes_opt.pth'
    model_eyes_3chanel.load_state_dict(torch.load(path))
    #print(net)
    distances_eyes_opt = []
    with torch.no_grad():
        with tqdm(testloader_eyes, unit="batch") as tepoch:
            for data in tepoch:
                image = data['eyes_img']
                labels = data['gt_coor']
                # labels = [0.5, 0.5]
                output = model_eyes_3chanel(image)
                dim = len(output)
                batch_dist = 0
                for i in range(dim):
                    out_x, out_y = output[i][0], output[i][1]
                    lab_x, lab_y = labels[i][0], labels[i][1]
                    dx = out_x - lab_x
                    dy = out_y - lab_y
                    dist = math.sqrt(dx*dx + dy*dy)
                    batch_dist += dist

                distance = batch_dist / len(output)
                distances_eyes_opt.append(distance*100)
                tepoch.set_postfix(distance=100*distance)
    print("done")