In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import torch
import glob
import os
# from alive_progress import alive_it
from tqdm import tqdm

from torch.utils.data import Dataset

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df = pd.read_csv("data/v1.0.0.csv")
df

Unnamed: 0,image_path,subject_ID,image_number,gender,glasses,eye_state,reflections,image_quality,sensor_type,label
0,/Users/I748920/Desktop/secure-face-capture-pyt...,s0030,663,0,0,1,0,1,1,open
1,/Users/I748920/Desktop/secure-face-capture-pyt...,s0030,486,0,0,1,0,1,1,open
2,/Users/I748920/Desktop/secure-face-capture-pyt...,s0030,384,0,0,1,0,1,1,open
3,/Users/I748920/Desktop/secure-face-capture-pyt...,s0030,782,0,0,1,0,1,1,open
4,/Users/I748920/Desktop/secure-face-capture-pyt...,s0030,764,0,0,1,0,1,1,open
...,...,...,...,...,...,...,...,...,...,...
84893,/Users/I748920/Desktop/secure-face-capture-pyt...,s0016,534,1,0,0,1,1,1,close
84894,/Users/I748920/Desktop/secure-face-capture-pyt...,s0016,1793,1,0,1,0,1,1,open
84895,/Users/I748920/Desktop/secure-face-capture-pyt...,s0016,331,1,0,0,0,1,1,close
84896,/Users/I748920/Desktop/secure-face-capture-pyt...,s0016,709,1,0,0,0,1,1,close


In [3]:
df.columns

Index(['image_path', 'subject_ID', 'image_number', 'gender', 'glasses',
       'eye_state', 'reflections', 'image_quality', 'sensor_type', 'label'],
      dtype='object')

In [10]:
# train test split
from sklearn.model_selection import train_test_split

train,test = train_test_split(df,test_size=0.1,shuffle=True,random_state=1,stratify=df[['label','gender', 'glasses','reflections', 'image_quality']])
train,val = train_test_split(train,test_size=2/7,shuffle=True,random_state=1,stratify=train[['label','gender', 'glasses','reflections', 'image_quality']])

In [16]:
len(train),len(val),len(test)

train.label.value_counts()/len(train),val.label.value_counts()/len(val),test.label.value_counts()/len(test)

(label
 open     0.505927
 close    0.494073
 Name: count, dtype: float64,
 label
 open     0.505932
 close    0.494068
 Name: count, dtype: float64,
 label
 open     0.505889
 close    0.494111
 Name: count, dtype: float64)

In [71]:
# find what the min_dim for the dataset is, that should resize all images to a number close to that, so that small images are not upsampled to larger dim instead
min_dim = np.float64("inf")
for i,row in tqdm(df.sample(n=10000).iterrows()):
    dims = plt.imread(row['image_path']).shape
    dims = min(dims)
    if dims<min_dim:
        min_dim=dims

min_dim

10000it [00:23, 421.39it/s]


53

should use a try except function and read all the files to ensure that all images are actually readable and not corrupt, should also set a threshold on min_dims so that weird images that are too small arent used

In [85]:
from torchvision.transforms import Compose,Resize,ToTensor

def load_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    return img
    

class EyeDataset(Dataset):
    def __init__(self,df,split_type):
        self.split_type = split_type

        self.image_paths = df.image_path.tolist()
        self.labels = df.label.tolist()
        self.mapping = {
            "close":0,
            "open":1,
        }
        
    def __len__(self):
        return len(self.image_paths)       
        
    def __getitem__(self,ind):
        image = load_image(self.image_paths[ind])
        label = self.mapping[self.labels[ind]]

        if self.split_type=='train':
            # apply train transforms
            pass
        else:
            # apply val and test transforms, resize, normalise, totensor
            val_transforms = Compose([
                ToTensor(), # converts to type torch tensor and normalise to [0,1]
                Resize((50,50))
            ])
            image = val_transforms(image)
       
        
        return image,label

In [89]:
sample_dataset = EyeDataset(
    df=train,
    split_type='val'
)

sample_dataset[0][0].shape

torch.Size([1, 50, 50])