In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import h5py
import math
import cv2
import pickle

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50, ResNet50_Weights
from torchvision import transforms as T

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
device = torch.device('cuda:0')
torch.manual_seed(17)

import math
import copy
import os, sys

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
class DGazeDataset(Dataset):
    def __init__(self, driver_data, drivers, sequences, transform = False):
        self.driver_data = driver_data
        self.drivers = drivers
        self.sequences = sequences
        self.left_eye =  np.empty((0, 36, 60, 3))
        self.facial_features = np.empty((0, 14))
        self.gaze_point = np.empty((0, 2))
#         print(sequences)
        
        for ix, driver in enumerate(drivers):
            print("==>", driver)
            data = driver_data[driver]
            for seq in tqdm(sequences):
                if 'seq' + str(seq) in data.keys():
                    data_seq = data['seq' + str(seq)]
                    self.left_eye = np.concatenate((self.left_eye, data_seq['left_eye']), axis=0)
                    seq_facial_features = np.concatenate((data_seq['headpose_pupil'][:,1:], data_seq['face_location']), axis=-1)
                    self.facial_features = np.concatenate((self.facial_features, seq_facial_features), axis=0)
                    self.gaze_point = np.concatenate((self.gaze_point, data_seq['gaze_point'][:,:2]), axis=0)

        print("Data loaded!")
        self.normalize_eye_image()
        self.normalize_facial_features() 
        self.fix_gaze_point()
        self.gaze_point[:,0][self.gaze_point[:,0]<0] = 0
        self.gaze_point[:,1][self.gaze_point[:,1]<0] = 0
        self.index = np.arange(len(self.gaze_point))
        
        
    def fix_gaze_point(self):
        self.gaze_point[:,1][self.gaze_point[:,1]>=1080]=1080-1
        self.gaze_point[:,0][self.gaze_point[:,0]>=1920]=1920-1


    def normalize_eye_image(self):
        # Calculate mean and standard deviation per channel
        mean = self.left_eye.mean(axis=(0, 1, 2), keepdims=True)
        std = self.left_eye.std(axis=(0, 1, 2), keepdims=True)

#         print("\nMean before normalization:", self.left_eye.mean(axis=(0, 1, 2)))
#         print("Standard deviation before normalization:", self.left_eye.std(axis=(0, 1, 2)))
        
        # Normalize using mean and standard deviation
        self.left_eye = (self.left_eye - mean) / std

#         # Check the result
#         print("\nMean after normalization:", self.left_eye.mean(axis=(0, 1, 2)))
#         print("Standard deviation after normalization:", self.left_eye.std(axis=(0, 1, 2)))
        
    def normalize_facial_features(self):
        # Column-wise mean and standard deviation
        mean = self.facial_features.mean(axis=0)
        std = self.facial_features.std(axis=0)
        
#         # Verify that each column now has mean 0 and variance 1
#         print("\nMean of each column before normalization:", self.facial_features.mean(axis=0))
#         print("Variance of each column beforeafter normalization:", self.facial_features.var(axis=0))

        # Mean-variance normalization (standardization)
        self.facial_features = (self.facial_features - mean) / std

#         # Verify that each column now has mean 0 and variance 1
#         print("\nMean of each column after normalization:", self.facial_features.mean(axis=0))
#         print("Variance of each column after normalization:", self.facial_features.var(axis=0))
        

    def __len__(self):
        return len(self.index)

    def __getitem__(self, idx):
        index = self.index[idx]
        left_eye = self.left_eye[index]
        left_eye = np.transpose(left_eye, (2,0,1))
        left_eye = torch.tensor(left_eye, dtype=torch.float32, device=device)
        facial_features = torch.tensor(self.facial_features[index], dtype=torch.float32, device=device)
        gaze_point = torch.tensor(self.gaze_point[index], dtype=torch.float32, device=device)
        return  left_eye, facial_features, gaze_point


In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

# Define the Left Eye Convolutional Model
class LeftEyeModel(nn.Module):
    def __init__(self):
        super(LeftEyeModel, self).__init__()
        # First Conv layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=0.5)
        
        # Second Conv layer
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.dropout(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = torch.flatten(x, 1)  # Flatten the tensor for the next layer
        return x

# Define the Face Feature Dense Model
class FaceFeaturesModel(nn.Module):
    def __init__(self):
        super(FaceFeaturesModel, self).__init__()
        self.fc1 = nn.Linear(14, 16)  # Equivalent to Dense(16, activation='relu', input_dim=14)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return x

# Define the Merged Model
class IDGAZE(nn.Module):
    def __init__(self):
        super(IDGAZE, self).__init__()
        self.left_eye = LeftEyeModel()
        self.face_features = FaceFeaturesModel()
        
        # Dense Layers after Concatenation
        self.fc2 = nn.Linear(4566, 512)  # Adjust 50*7*7 based on left_eye output dimensions
        self.fc3 = nn.Linear(512, 2)

    def forward(self, x1, x2):
        x1 = self.left_eye(x1)
        x2 = self.face_features(x2)
        
        # Concatenate outputs from the two models
        x = torch.cat((x1, x2), dim=1)
        
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate the model and print the summary
model = IDGAZE()
print(model)

IDGAZE(
  (left_eye): LeftEyeModel(
    (conv1): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout): Dropout(p=0.5, inplace=False)
    (conv2): Conv2d(20, 50, kernel_size=(3, 3), stride=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (face_features): FaceFeaturesModel(
    (fc1): Linear(in_features=14, out_features=16, bias=True)
  )
  (fc2): Linear(in_features=4566, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=2, bias=True)
)


In [4]:
# def test(model, testdataset, batch_size=64):
#     criterion = nn.L1Loss(reduction='sum')
#     loader = DataLoader(testdataset, batch_size=batch_size, shuffle=False, num_workers=0)
#     loss = 0
#     numsamples = 0
#     with torch.no_grad():
#         for ii, (leye, x, gt) in enumerate(loader):
#             out = model(leye.to(device), x.to(device))
#             loss = loss + criterion(out, gt.to(device)).item()
#             numsamples +=len(gt)

#             print(f'Done {numsamples} of {len(testdataset)}              ', end='\r')

#     print('loss is ', loss/numsamples)


# def train(model, traindataset, testdataset, batch_size=32):
#     criterion = nn.L1Loss(reduction='sum')
#     optimizer = optim.Adam(model.parameters(), lr=1e-7)
#     loader = DataLoader(traindataset, batch_size=batch_size, shuffle=False, num_workers=0)
#     for epoch in range(nepochs):
#         running_loss=0
#         numsamples=0
#         for ii, (leye, feature, gt) in enumerate(loader):
#             out = model(leye.to(device), feature.to(device))
#             numsamples += gt.shape[0]
#             optimizer.zero_grad()
#             loss  = criterion(out, gt.to(device))
#             loss.backward()
#             optimizer.step()
#             running_loss+=loss.item()
#             if (ii+1)%10 == 0:
#                 print(f'batch {ii} loss: {running_loss/numsamples:.4f}                    ', end = '\r', flush=True)
#                 running_loss=0
#                 numsamples = 0

#         print(f'Epoch {epoch} \n\ttrain error ', end=' ')
#         test(model, traindataset)
#         print('\n\ttest error: ', end= ' ')
#         test(model, testdataset)
#         torch.save(model.state_dict(), f'708_Model_epoch_{epoch:d}.pth')

In [34]:

# Function to log messages to both the console and a file
def log_message(message, logfile='training_log.txt'):
    print(message)
    with open(logfile, 'a') as f:
        f.write(message + '\n')

# Testing function
def test(model, testdataset, batch_size=64, logfile='training_log.txt'):
    criterion = nn.L1Loss(reduction='mean')
    loader = DataLoader(testdataset, batch_size=batch_size, shuffle=False, num_workers=0)
    loss = 0
    numsamples = 0
    with torch.no_grad():
        for ii, (leye, x, gt) in enumerate(loader):
            out = model(leye.to(device), x.to(device))
            batch_loss = criterion(out, gt.to(device)).item()
            loss += batch_loss
            numsamples += len(gt)
            
#             log_message(f'Testing: Processed {numsamples}/{len(testdataset)} samples', logfile)

#     avg_loss = loss / numsamples
    avg_loss = loss
    log_message(f'Testing completed. Average loss: {avg_loss:.4f}\n', logfile)
    return avg_loss

# Training function
def train(model, traindataset, testdataset, batch_size=32, nepochs=10, dump_path=None, logfile='training_log.txt'):
    criterion = nn.L1Loss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    train_loader = DataLoader(traindataset, batch_size=batch_size, shuffle=True, num_workers=0)

    logfile = os.path.join(dump_path, logfile)
    log_message('Training started\n', logfile)

    for epoch in range(nepochs):
        running_loss = 0
        numsamples = 0
        log_message(f'Starting Epoch {epoch + 1}/{nepochs}', logfile)

        # Training loop
        for ii, (leye, feature, gt) in enumerate(train_loader):
            out = model(leye.to(device), feature.to(device))
            loss = criterion(out, gt.to(device))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Update tracking metrics
            batch_loss = loss.item()
            running_loss += batch_loss
            numsamples += gt.shape[0]

            if (ii + 1) % 10 == 0:
                avg_batch_loss = running_loss 
#                 avg_batch_loss = running_loss / numsamples
#                 log_message(f'Epoch [{epoch + 1}/{nepochs}], Batch [{ii + 1}/{len(train_loader)}], '
#                             f'Average Loss: {avg_batch_loss:.4f}', logfile)
                running_loss = 0
                numsamples = 0

        # Epoch summary
        log_message(f'Completed Epoch {epoch + 1}/{nepochs}', logfile)
        train_error = test(model, traindataset, batch_size, logfile)
        test_error = test(model, testdataset, batch_size, logfile)
        log_message(f'\tTrain Error: {train_error:.4f}\n\tTest Error: {test_error:.4f}\n', logfile)

        # Save model after each epoch
        torch.save(model.state_dict(), f'Model_epoch_{epoch + 1}.pth')
        log_message(f'Model saved after Epoch {epoch + 1}\n', logfile)

In [35]:
# if __name__ =='__main__':
# 	device = int(sys.argv[1])
# 	if device==-1:
# 		device = torch.device('cpu')
# 	else:
# 		device = torch.device(f'cuda:{device}')


## Path
DGAZE_extracted_data = '/pfs01/performance-tier/rd_algo/algo_bin/ishadua/codes/old_codes/DGAZE/DGAZE_extracted_data/DGAZE_extracted_data.pkl'
DGAZE_data_split = '/pfs01/performance-tier/rd_algo/algo_bin/ishadua/codes/old_codes/DGAZE/DGAZE_extracted_data/DGAZE_data_split.pkl'
dump_path = '/pfs01/performance-tier/rd_algo/algo_bin/ishadua/codes/old_codes/DGAZE/results/save_models/run1/'

## Training Params
batch_size = 32
learning_rate = 0.001
nepochs = 300

# Load dictionary
with open(DGAZE_extracted_data, 'rb') as file:
    driver_data = pickle.load(file)
    
# Load dictionary
with open(DGAZE_data_split, 'rb') as file:
    data_split = pickle.load(file)

In [30]:
train_dataset = DGazeDataset(driver_data, data_split['drivers_train'][:2], data_split['sequence_train'])
val_dataset = DGazeDataset(driver_data, data_split['drivers_val'][:2], data_split['sequence_val'])
test_dataset = DGazeDataset(driver_data, data_split['drivers_test'][:2], data_split['sequence_test'])

print(len(train_dataset), len(val_dataset), len(test_dataset))

==> driver22


100%|█████████████████████████████████████████████████████████████████████████████| 72/72 [00:03<00:00, 18.34it/s]


==> driver8


100%|█████████████████████████████████████████████████████████████████████████████| 72/72 [00:12<00:00,  5.81it/s]


Data loaded!
==> driver16


100%|█████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 69.39it/s]


==> driver5


100%|█████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 26.87it/s]


Data loaded!
==> driver14


100%|█████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 66.88it/s]


==> driver3


100%|█████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 22.56it/s]


Data loaded!
15770 3399 3719


In [None]:
model = IDGAZE().to(device)
train(model, train_dataset, val_dataset, dump_path=dump_path)

Training started

Starting Epoch 1/10
Completed Epoch 1/10
Testing completed. Average loss: 87780.5622

Testing completed. Average loss: 31048.7240

	Train Error: 87780.5622
	Test Error: 31048.7240

Model saved after Epoch 1

Starting Epoch 2/10
Completed Epoch 2/10


In [None]:
train_dataset.gaze_point[:,:].max()

In [None]:
1920, 1080