# ECE 763 Project 01: Face Detection
## faceScrub dataset

In [22]:
import csv
import numpy as np
from PIL import Image
from tqdm import tqdm

### Methods

In [23]:
def buildDataset(actors_list):
  img_set = [] # empty list
  for actor in tqdm(actors_list, total=len(actors_list) ,desc='Loading dataset'):
    img0 = {} # empty dictionary
    img1 = {} # empty dictionary
    try:
      # create dictionary and add it to the list
      img0['file'] = actor['0']
      img0['class'] = 0
      img0['image'] = Image.open(actor['0'])
      img_set.append(img0)
      img1['file'] = actor['1']
      img1['class'] = 1
      img1['image'] = Image.open(actor['1'])
      img_set.append(img1)
    except:
      print(actor['name'] + " couldn't be found!")
  return img_set

Load datasets into memmory

In [24]:
test_actors = []
train_actors = []

test_file = './data/test/test.txt'
train_file = './data/training/training.txt'

# Read test data
with open(test_file, newline='') as actors:
    actors_reader = csv.DictReader(actors, delimiter='\t')
    for actor in actors_reader:
        test_actors.append(actor)

# Read training data
with open(train_file, newline='') as actors:
    actors_reader = csv.DictReader(actors, delimiter='\t')
    for actor in actors_reader:
        train_actors.append(actor)

# Build datasets
test_set = buildDataset(test_actors)
train_set = buildDataset(train_actors)

Loading dataset: 100%|██████████| 100/100 [00:00<00:00, 2796.82it/s]
Loading dataset: 100%|██████████| 1000/1000 [00:00<00:00, 4712.04it/s]


Concatenate RGB values and normalize data

In [25]:
# Convert images to np array
for face in test_set:
    face['image'] = np.asfarray(face['image']).flatten() / 255.0
for face in train_set:
    face['image'] = np.asfarray(face['image']).flatten() / 255.0

# Inspect data
print('Sample from test set: ')
print(test_set[0]['image'])
print('shape: ' , test_set[0]['image'].shape)
print('Sample from training set: ')
print(train_set[0]['image'])
print('shape: ' , test_set[0]['image'].shape)

Sample from test set: 
[0.4        0.42745098 0.63137255 ... 0.24313725 0.28235294 0.41960784]
shape:  (1200,)
Sample from training set: 
[0.41568627 0.21960784 0.17647059 ... 0.4        0.25098039 0.2       ]
shape:  (1200,)


Crete training class subsets and X matrix

In [26]:
# Create vectors of training data
X0_subset = [ face for face in train_set if face['class']==0 ]
X1_subset = [ face for face in train_set if face['class']==1 ]
x_len = train_set[0]['image'].shape[0]

X0 = np.empty( (x_len,len(X0_subset)) )
X1 = np.empty( (x_len,len(X1_subset)) )

for i,face in enumerate(X0_subset):
    try:
        X0[:,i] = face['image']
    except:
        print(i, face['image'].shape, face['file'])

for i,face in enumerate(X1_subset):
    X1[:,i] = face['image']

# Sanity check
print('X0 shape: ' , X0.shape)
print('X1 shape: ' , X1.shape)

X0 shape:  (1200, 1000)
X1 shape:  (1200, 1000)


Compute sample mean vector and sample covariance matrix

In [44]:
# Mean
mu0 = X0.mean(axis=1)
mu1 = X1.mean(axis=1)

# Variance
sigma0 = np.cov(X0, bias=True)
sigma1 = np.cov(X1, bias=True)

s0 = np.zeros( (X0.shape[0],X0.shape[0]) )
for i in range(X0.shape[1]):
    try:
        d = X0[:,i][:, np.newaxis] - mu0[:, np.newaxis]
    except:
        print(i)
    dm = (d @ d.T)
    s0 = s0 + dm
s0 /= X0.shape[1]
print('s0 aprox sigma0?' , np.allclose(s0,sigma0))



s0 aprox sigma0? True
