In [1]:
from sklearn.model_selection import train_test_split
from pathlib import Path
import matplotlib.image as img
import json
import numpy as np
from tqdm import tqdm
import h5py

from multiprocessing import Pool
import time
from concurrent.futures import ThreadPoolExecutor
import functools
import pandas as pd

In [2]:
# from tensorflow.python.client import device_lib
# device_lib.list_local_devices()

In [3]:
# Importing Data
# Notes:

# Orientation
# 1: portrait
# 2: portrait, upside down (iPad only)
# 3: landscape, with home button on the right
# 4: landscape, with home button on the left

In [4]:
# Helper Functions from EDA
def ejson(p, fn): # extract json
    with open((p/fn).as_posix()) as f: return json.load(f) 
## Simple Json Reading Functions
def info_data(p): return ejson(p, 'info.json')
def dot_data(p): return ejson(p, 'dotInfo.json')
def frame_data(p): return ejson(p, 'frames.json')    
def screen_data(p): return ejson(p, 'screen.json')
def get_frame(p, img_fn): return img.imread(p/'..'/'..'/'gazecapture-224x224'/p.name/'frames'/img_fn)
## Larger Helper Functions
def coordinate_ydata(p): 
    data = dot_data(p)
    return data['YCam'] # we want relative to camera coords
def coordinate_xdata(p): 
    data = dot_data(p)
    return data['XCam'] # we want relative to camera coords

In [5]:
# dset_path = Path('ml/gazecapture/')
dset_path = Path('../gazecapture/')

In [14]:
# Extracting and storing X values
def extract_photo(f, case): return get_frame(case, f)

def extract(case):
    F = frame_data(case) # list of file names correlated to camera images 
    with Pool(6) as p:  # multi core usale
        x = p.map(functools.partial(extract_photo, case=case), F)
    return x

def extract_to_hdf5(cname, bs, extract_fcn): # bs dsets per hp5y file
    f = h5py.File('{}-0.hdf5'.format(cname), 'w') 
    for i, case in enumerate(list(dset_path.iterdir())):
        cn = case.name
        edata = extract_fcn(case)
        
        break
        dset = f.create_dataset('dset'+cn, (len(edata), 224, 224, 3))
        dset[...] = edata

        if i % bs == 0 and i!= 0:
            f.close()
            f = h5py.File('{}-{}.hdf5'.format(cname, i//bs), 'w')

In [8]:
extract_to_hdf5('X', 500, extract)
extract_to_hdf5('Y_Xpoint', 2000, coordinate_xdata)
extract_to_hdf5('Y_YPoint', 2000, coordinate_ydata)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/brennangebotys/Documents/workspace/eyetracking/gazecapture-224x224/01128/frames/00144.jpg'

In [19]:
# split into train / test sets
# then split the train set into train / val sets
cases = []
fnames = []
for case in dset_path.iterdir():
    F = frame_data(case)
    for f in F:
        cases.append(case.name)
        fnames.append(f)
    break

In [25]:
# create dataframe from data
df = pd.DataFrame(data={'case_name': cases, 'file_names': fnames})
df.head()

Unnamed: 0,case_name,file_names
0,1128,00000.jpg
1,1128,00001.jpg
2,1128,00002.jpg
3,1128,00003.jpg
4,1128,00004.jpg


In [26]:
test_size = 0.1
train, test = train_test_split(df, test_size=test_size)

In [28]:
train.shape, test.shape

((757, 2), (85, 2))

In [None]:
f = h5py.File('X-0.hdf5', 'r')
for k in list(f.keys()):
    print(f[k].shape)