In [1]:
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

In [14]:
data_path = '/home/data/'
aflw_path = data_path + 'aflw/aflw/data/'
wflw_path = data_path + 'wflw/'
w300_path = data_path + '300w/'

# 300W Parsing

In [31]:
# Code for reading points
pd.read_csv(w300_path + 'afw/16413031_1.pts', sep=' ', header=2, names=['x', 'y'])[:-1]

Unnamed: 0,x,y
0,208.640420,172.473088
1,208.669696,194.779178
2,213.247178,218.584923
3,220.836104,237.941945
4,230.628442,261.062325
...,...,...
63,337.706819,267.582824
64,347.797064,268.724438
65,337.434185,268.650961
66,331.598989,269.118504


In [44]:
# Get image filepaths
import glob

w300_train_folders = ['afw', 'helen', 'ibug', 'lfpw']
w300_train_img_paths = []
#w300_train_pts_paths = []
for folder in w300_train_folders:
    w300_train_img_paths += glob.glob(w300_path + folder + '/**/*.png', recursive=True)
    w300_train_img_paths += glob.glob(w300_path + folder + '/**/*.jpg', recursive=True)
    
w300_train_df = pd.DataFrame({'img_path': w300_train_img_paths})

In [47]:
w300_train_df['pts_path'] = w300_train_df['img_path'].str.slice_replace(start=-3, repl='pts')
print(w300_train_df)

                                          img_path  \
0              /home/data/300w/afw/156474078_2.jpg   
1               /home/data/300w/afw/18489332_2.jpg   
2             /home/data/300w/afw/2406586388_1.jpg   
3              /home/data/300w/afw/397921011_1.jpg   
4             /home/data/300w/afw/1648807314_3.jpg   
...                                            ...   
3832  /home/data/300w/lfpw/trainset/image_0409.png   
3833  /home/data/300w/lfpw/trainset/image_0012.png   
3834  /home/data/300w/lfpw/trainset/image_0616.png   
3835  /home/data/300w/lfpw/trainset/image_0338.png   
3836  /home/data/300w/lfpw/trainset/image_0481.png   

                                          pts_path  
0              /home/data/300w/afw/156474078_2.pts  
1               /home/data/300w/afw/18489332_2.pts  
2             /home/data/300w/afw/2406586388_1.pts  
3              /home/data/300w/afw/397921011_1.pts  
4             /home/data/300w/afw/1648807314_3.pts  
...                              

In [48]:
w300_train_df.to_csv('300w_train_locs.csv')

In [49]:
w300_test_folders = ['test_images']
w300_test_img_paths = []
for folder in w300_test_folders:
    w300_test_img_paths += glob.glob(w300_path + folder + '/**/*.png', recursive=True)
    w300_test_img_paths += glob.glob(w300_path + folder + '/**/*.jpg', recursive=True)
    
w300_test_df = pd.DataFrame({'img_path': w300_test_img_paths})
w300_test_df['pts_path'] = w300_test_df['img_path'].str.slice_replace(start=-3, repl='pts')
print(w300_test_df)

                                              img_path  \
0    /home/data/300w/test_images/02_Outdoor/outdoor...   
1    /home/data/300w/test_images/02_Outdoor/outdoor...   
2    /home/data/300w/test_images/02_Outdoor/outdoor...   
3    /home/data/300w/test_images/02_Outdoor/outdoor...   
4    /home/data/300w/test_images/02_Outdoor/outdoor...   
..                                                 ...   
595  /home/data/300w/test_images/01_Indoor/indoor_1...   
596  /home/data/300w/test_images/01_Indoor/indoor_0...   
597  /home/data/300w/test_images/01_Indoor/indoor_2...   
598  /home/data/300w/test_images/01_Indoor/indoor_2...   
599  /home/data/300w/test_images/01_Indoor/indoor_1...   

                                              pts_path  
0    /home/data/300w/test_images/02_Outdoor/outdoor...  
1    /home/data/300w/test_images/02_Outdoor/outdoor...  
2    /home/data/300w/test_images/02_Outdoor/outdoor...  
3    /home/data/300w/test_images/02_Outdoor/outdoor...  
4    /home/data/30

In [50]:
w300_test_df.to_csv('300w_test_locs.csv')

# WFLW Parsing

In [119]:
wflw_train_df = pd.read_csv(wflw_path + 'WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt', 
                            sep=' ', header=None)

In [120]:
print(wflw_train_df)

             0           1           2           3           4           5    \
0     309.307007  538.369019  317.857345  560.120847  322.271739  583.014395   
1     579.002991  167.764008  579.682070  179.841323  580.396685  191.916578   
2     249.128006  175.462997  249.330412  188.417601  249.393073  201.373615   
3     812.989014  627.505005  813.713626  633.254430  814.450604  639.002281   
4     507.287994  280.026001  507.360081  288.558285  507.747252  297.081106   
...          ...         ...         ...         ...         ...         ...   
7495  707.510986  466.559998  707.241833  474.279968  706.911942  481.997532   
7496   95.217903  336.835999  100.954315  347.911683  106.551757  359.058076   
7497  394.656036  549.344543  398.777789  565.350391  398.190038  581.794235   
7498  625.458008  193.985992  625.306287  199.320526  625.186008  204.655847   
7499  385.703339  135.728165  385.993068  140.889577  384.926178  145.950075   

             6           7           8 

# AFLW Parsing

In [3]:
import sqlite3

aflw_sql_con = sqlite3.connect(aflw_path + 'aflw.sqlite')
feature_coords = pd.read_sql('SELECT * FROM FeatureCoords', aflw_sql_con)

In [71]:
# Saves FeatureCoords to csv
features = pd.DataFrame(data = {'face_id': feature_coords.face_id.unique()})
for faceid in tqdm(features.face_id.unique()):
    face_features = feature_coords[feature_coords.face_id == faceid]

    for i in face_features.feature_id:
        features.loc[features.face_id == faceid, [f'{i}_x']]= face_features[face_features['feature_id'] == i].iloc[0].x
        features.loc[features.face_id == faceid, [f'{i}_y']]= face_features[face_features['feature_id'] == i].iloc[0].y
features.to_csv('aflw_featurecoords.csv', index=False)

[19  1  2  3  4  5  7  8  9 11 13 14 15 18 20 21  6 10 12 16 17]


In [13]:
# Prints list of tables in SQL database

res = aflw_sql_con.execute("SELECT name FROM sqlite_master WHERE type='table';")
for name in res:
    print(name[0])

Faces
sqlite_sequence
FacePose
FaceImages
Databases
FaceMetaData
sqlite_stat1
FaceRect
AnnotationType
FaceEllipse
NearDuplicates
FeatureCoords
FeatureCoordTypes
CamPose


In [10]:
# Obtains image paths
aflw_filepaths = pd.read_sql('SELECT * FROM FaceImages', aflw_sql_con)
aflw_filepaths.to_csv('aflw_imagepaths.csv', index=False)

In [13]:
# Obtains image metadata
aflw_metadata = pd.read_sql('SELECT * FROM FaceMetaData', aflw_sql_con)
aflw_metadata.to_csv('aflw_metadata.csv', index=False)