In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PPlib import jpg_to_face_train, filter_faces_train, jpg_to_face_test, filter_faces_test, compute_mu_sig

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# define train constants
input_dir_all = 'data/raw_train_all/'
face_data = "valid_train.csv"
face_data_filt = "valid_train_filt.csv"
output_dir_all = 'data/clean_train_all/'

i_start = 0
num_img = 9540
q_mn = 0.95
ar_mx = 1
ar_mn = 0.5
px_mn = 4096
npx = 224

In [None]:
# find all images with exactly one face
valid_id, valid_bbox_all, valid_q_all, valid_ar_all, valid_px_all, bad_id = jpg_to_face_train(input_dir_all,i_start,num_img)

# convert to arrays
num_valid = len(valid_id)

valid_q = np.array(valid_q_all).reshape(num_valid,1)
valid_ar = np.array(valid_ar_all).reshape(num_valid,1)
valid_px = np.array(valid_px_all).reshape(num_valid,1)
valid_id = np.array(valid_id).reshape(num_valid,1)
valid_bbox = np.array(valid_bbox_all).reshape(num_valid,4)

# save data
valid_train = np.hstack((valid_id, valid_q, valid_ar, valid_px, valid_bbox))
np.savetxt(face_data, valid_train, delimiter=",")

In [None]:
# apply filters
q_filt = valid_q > q_mn
ar_filt_mx = valid_ar < ar_mx
ar_filt_mn = valid_ar > ar_mn
px_filt = valid_px > px_mn
all_filt = (q_filt * ar_filt_mx * ar_filt_mn * px_filt)

# determine baseline
print(num_valid)

# determine filtered
print(sum(all_filt))

# determine additional filtering
plt.figure(1)
plt.hist(valid_q_all)
print(sum(q_filt))
# conclusion: q > 0.97

plt.figure(2)
plt.hist(valid_ar_all)
print(sum(ar_filt_mx))
# conclusion: ar < 1

plt.figure(3)
plt.hist(valid_ar_all)
print(sum(ar_filt_mn))
# conclusion: ar > 0.5

plt.figure(4)
plt.hist(valid_px_all)
print(sum(px_filt))
# conclusion: px > 4096

In [None]:
# get data from spreadsheet
valid_train = np.loadtxt(face_data, delimiter =',')

valid_id = valid_train[:,0]
valid_q = valid_train[:,1]
valid_ar = valid_train[:,2]
valid_px = valid_train[:,3]
valid_bbox = valid_train[:,4:]

num_valid = len(valid_id)

# convert raw data into clean training data
valid_train_filt = filter_faces_train(input_dir_all, output_dir_all, valid_id, valid_bbox, valid_q, valid_ar, valid_px, num_valid, ar_mx, ar_mn, q_mn, px_mn, npx)

# save filtered data
np.savetxt(face_data_filt, valid_train_filt, delimiter=",")

In [None]:
# define test constants
input_dir_test = 'data/raw_test/'
face_data_test = "valid_test.csv"
output_dir_test = 'data/clean_test/'
face_filt_test = 'test_filt.csv'

i_start = 0
num_img = 4977
q_mn = 0.95
npx = 224

In [None]:
# find all faces in images
dup_number, valid_bbox_all, valid_q_all, valid_ar_all, valid_px_all = jpg_to_face_test(input_dir_test,i_start,num_img)

# convert to arrays
num_pics = len(dup_number)

valid_q = np.array(valid_q_all).reshape(num_pics,1)
valid_ar = np.array(valid_ar_all).reshape(num_pics,1)
valid_px = np.array(valid_px_all).reshape(num_pics,1)
dup_number = np.array(dup_number).reshape(num_pics,1)
valid_bbox = np.array(valid_bbox_all).reshape(num_pics,4)

# save data
valid_test = np.hstack((dup_number, valid_q, valid_ar, valid_px, valid_bbox))
np.savetxt(face_data_test, valid_test, delimiter=",")

In [None]:
# get data from spreadsheet
valid_test = np.loadtxt(face_data_test, delimiter =',')

dup_number = valid_test[:,0].astype(int)
valid_q = valid_test[:,1]
valid_ar = valid_test[:,2]
valid_px = valid_test[:,3]
valid_bbox = valid_test[:,4:]

num_valid = len(dup_number)

# convert raw data into clean training data
dup_number_filt, q_filt, bbox_filt = filter_faces_test(input_dir_test, output_dir_test, dup_number, valid_bbox, valid_q, q_mn, npx)

In [None]:
num_img = len(dup_number_filt)

dup_number_filt = np.array(dup_number_filt).reshape((num_img,1))
q_filt = np.array(q_filt).reshape((num_img,1))
bbox_filt = np.array(bbox_filt).reshape((num_img,4))

# save data
test_filt = dup_number_filt
np.savetxt(face_filt_test, test_filt, delimiter=',')

In [2]:
# compute mean and standard deviation of images
num_img = 69540
train_dir = 'data/clean_train_all/'
face_filt_dir = "valid_train_filt.csv"

mu, sig = compute_mu_sig(train_dir, face_filt_dir)