<a href="https://colab.research.google.com/github/axiezai/nl-processors/blob/main/nl-processors/gaze/colab_classify_gaze.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Load in GazeCom labeled data for training `[x,y]` coordinates. 

In [2]:
import requests, zipfile

# Download and unzip GazeCom training data in google colab:

fname = 'GazeCom.zip'
url = 'https://michaeldorr.de/smoothpursuit/GazeCom.zip'
r = requests.get(url, allow_redirects=True)

with open(fname, 'wb') as fd:
  fd.write(r.content)

with zipfile.ZipFile(fname, 'r') as zip_ref:
  zip_ref.extractall('/content/GazeCom_data')

In [3]:
import os
import natsort
import glob
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.io import arff

import torch
import torch.nn as nn

Helper functions:    

In [6]:
def get_files(pattern):
    """
    Extracts file in alphanumerical order that match the provided pattern
    """
    if isinstance(pattern, list):
        pattern = os.path.join(*pattern)
        
    files = natsort.natsorted(glob.glob(pattern))
    if not files:
        raise FileNotFoundError('Pattern could not detect file(s)')
        
    return files


def set_seed(seed=None, seed_torch=True):
    if seed is None:
        seed = np.random.choice(2 ** 32)

    random.seed(seed)
    np.random.seed(seed)
    if seed_torch:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    
    print(f'Random seed {seed} has been set.')


def seed_wworker(worker_id):
    """In case dataloader is used?
    """
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


def set_device():
    """Using GPU or CPU?
    """
    device = 'cuda' if torch.cuda.is_available() else "cpu"
    if device != 'cuda':
      print("WARNING: For this notebook to perform best, "
        "if possible, in the menu under `Runtime` -> "
        "`Change runtime type.`  select `GPU` ")
    else:
      print("GPU is enabled in this notebook.")

    return device

In [14]:
raw_data = get_files('/content/GazeCom_data/gaze_arff/*/*.arff')
print(f'There are {len(raw_data)} raw eye gaze files')

labeled_data = get_files('/content/GazeCom_data/ground_truth/*/*.arff')
print(f'There are {len(labeled_data)} labeled eye gaze files')

# Load one in and examine what's inside ARFF files:
raw_arff = arff.loadarff(raw_data[0])
raw_df = pd.DataFrame(raw_arff[0])
print('Raw file:')
raw_df.head()

There are 844 raw eye gaze files
There are 844 labeled eye gaze files
Raw file:


Unnamed: 0,time,x,y,confidence
0,1000.0,590.9,5.2,1.0
1,5000.0,590.9,5.2,1.0
2,9000.0,590.6,5.0,1.0
3,13000.0,590.4,5.0,1.0
4,17000.0,589.8,5.2,1.0


In [16]:
labeled_arff = arff.loadarff(labeled_data[0])
labeled_df = pd.DataFrame(labeled_arff[0])
print('Labeled file:')
labeled_df.head()

Labeled file:


Unnamed: 0,time,x,y,confidence,handlabeller1,handlabeller2,handlabeller_final
0,1000.0,590.9,5.2,1.0,4.0,4.0,4.0
1,5000.0,590.9,5.2,1.0,4.0,4.0,4.0
2,9000.0,590.6,5.0,1.0,4.0,4.0,4.0
3,13000.0,590.4,5.0,1.0,4.0,4.0,4.0
4,17000.0,589.8,5.2,1.0,4.0,4.0,4.0


In [15]:
DEVICE = set_device()
SEED = 42
set_seed(SEED)

Random seed 42 has been set.
