# Import Library

In [1]:
import pandas as pd
import torch
from os.path import exists
from facenet_pytorch import MTCNN, extract_face
from PIL import Image, ImageDraw

# Import CSV files and Prepross

In [2]:
labeled_users = './data/labeled_users.csv'
user_demo_profiles_path = './data/User demo profiles.json'

df_users = pd.read_csv(labeled_users)
df_profiles = pd.read_json(user_demo_profiles_path, orient='values')

In [3]:
df_users = df_users.drop(['is_female','year_born'],axis = 1)
df_users = df_users.rename(columns={"user_id":"id","race":"race"})
df_users.head()

Unnamed: 0,id,race
0,12488.0,4.0
1,719703.0,4.0
2,722153.0,3.0
3,749003.0,5.0
4,755531.0,4.0


In [4]:
df_profiles = df_profiles.drop(['name','screen_name','description','lang'],axis = 1)
df_profiles.head()

Unnamed: 0,id,img_path
0,12488,profile pics/60147.jpeg
1,719703,profile pics/60148.jpeg
2,722153,profile pics/60149.jpeg
3,749003,profile pics/60150.jpeg
4,1896301728,profile pics/60151.jpeg


In [6]:
data = df_users.join(df_profiles.set_index('id'), on='id')
data = data.dropna()
data['absolute_img_path'] = './data/' + data['img_path']
data.head()

Unnamed: 0,id,race,img_path,absolute_img_path
0,12488.0,4.0,profile pics/60147.jpeg,./data/profile pics/60147.jpeg
1,719703.0,4.0,profile pics/60148.jpeg,./data/profile pics/60148.jpeg
2,722153.0,3.0,profile pics/60149.jpeg,./data/profile pics/60149.jpeg
3,749003.0,5.0,profile pics/60150.jpeg,./data/profile pics/60150.jpeg
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg


In [8]:
def set_cropped_path(input):
    return './data/cropped/' + input[13:]
data['cropped_path'] = data['img_path'].apply(set_cropped_path)
data.head()

Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path
0,12488.0,4.0,profile pics/60147.jpeg,./data/profile pics/60147.jpeg,./data/cropped/60147.jpeg
1,719703.0,4.0,profile pics/60148.jpeg,./data/profile pics/60148.jpeg,./data/cropped/60148.jpeg
2,722153.0,3.0,profile pics/60149.jpeg,./data/profile pics/60149.jpeg,./data/cropped/60149.jpeg
3,749003.0,5.0,profile pics/60150.jpeg,./data/profile pics/60150.jpeg,./data/cropped/60150.jpeg
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg


In [9]:
print(data.shape)
to_drop = []
for i, row in enumerate(data.iterrows()):
    index, row = row
    path = row['absolute_img_path']
    if not exists(path):
        to_drop.append(index)
data = data.drop(to_drop)
print(data.shape)

(3572, 5)
(3487, 5)


# Import a face detection model

In [10]:
# little test
test_path = './data/profile pics/60152.jpeg'#data['img_path'][0]
img = Image.open(test_path)
img.show()

In [19]:
# face detector
mtcnn = MTCNN(keep_all=True,select_largest=True)
boxes, probs, points = mtcnn.detect(img, landmarks=True)

In [20]:
# Draw boxes and save faces
extract = extract_face(img, boxes[0], save_path='detected_face_{}.png'.format(i))

# Get All faces

In [23]:
def get_all_faces(input):
    img_path, cropped_path = input[0],input[1]
    img = Image.open(img_path)
    boxes, probs = mtcnn.detect(img)
    if boxes is not None:
        extract_face(img, boxes[0], save_path = cropped_path)

In [24]:
data[['absolute_img_path','cropped_path']].apply(get_all_faces,axis = 1)

0       None
1       None
2       None
3       None
5       None
        ... 
4120    None
4121    None
4122    None
4123    None
4124    None
Length: 3487, dtype: object

In [25]:
data.to_pickle('./data.pkl')