In [1]:
from facenet_pytorch import InceptionResnetV1, training, fixed_image_standardization
from torch.utils.tensorboard import SummaryWriter
import torch
import torchvision
import pandas as pd
import numpy as np
import os
from PIL import Image

In [2]:
batch_size = 8
epochs = 16
workers = 0 if os.name == 'nt' else 8
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [3]:
data = pd.read_pickle('./race_prediction_image/data.pkl')
data['race'] = np.where(
    data['race'] == 4, 0.0, data['race']
)
data['race'] = np.where(
    data['race'] == 5, np.nan, data['race']
)
data = data.dropna()
print(data['race'].value_counts())
data.head()

0.0    2726
1.0     318
2.0     200
3.0     112
Name: race, dtype: int64


Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path
0,12488.0,0.0,profile pics/60147.jpeg,./data/profile pics/60147.jpeg,./data/cropped/60147.jpeg
1,719703.0,0.0,profile pics/60148.jpeg,./data/profile pics/60148.jpeg,./data/cropped/60148.jpeg
2,722153.0,3.0,profile pics/60149.jpeg,./data/profile pics/60149.jpeg,./data/cropped/60149.jpeg
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg
6,822540.0,0.0,profile pics/60153.jpeg,./data/profile pics/60153.jpeg,./data/cropped/60153.jpeg


In [4]:
def load_images(input):
    if os.path.exists(input):
        tmp = Image.open(input)
        test = tmp.getbands()
        keep = tmp.copy()
        return keep
    else: return pd.NA

In [5]:
data['face'] = data['cropped_path'].apply(load_images)
data = data.dropna()
print(data.shape)
data.head()

(2385, 6)


Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path,face
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...
7,865071.0,0.0,profile pics/60154.jpeg,./data/profile pics/60154.jpeg,./data/cropped/60154.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...
8,988211.0,0.0,profile pics/60155.jpeg,./data/profile pics/60155.jpeg,./data/cropped/60155.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...
9,1025311.0,0.0,profile pics/60156.jpeg,./data/profile pics/60156.jpeg,./data/cropped/60156.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...
10,1143891.0,3.0,profile pics/60157.jpeg,./data/profile pics/60157.jpeg,./data/cropped/60157.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...


In [6]:
class Face_Race_Dataset(torch.utils.data.Dataset):
    def __init__(self, x,y):
        self.x = x
        self.y = y
        self.transform = torchvision.transforms.Compose(
                            [
                            np.float32,
                            torchvision.transforms.ToTensor(),
                            fixed_image_standardization])

    def __len__(self):
        return len(self.x)

    def __getitem__(self, index):
        return self.transform(self.x[index]),torch.tensor(self.y[index],dtype=torch.long)

In [20]:
totalset = Face_Race_Dataset(data['face'].tolist(),data['race'].tolist())

In [21]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=4
).to(device)

In [46]:
resnet.load_state_dict(torch.load('./race_prediction_image/dataset1_pic_race_model.pt'))
dataloader = torch.utils.data.DataLoader(totalset,batch_size=32,num_workers=workers,shuffle=False)
resnet.eval()
total_pred = []
total_prob = pd.DataFrame()
total_y = []
for i, (x,y) in enumerate(dataloader):
    x = x.to(device)
    preds = resnet(x)
    _, pred_y = torch.max(preds,1)
    preds = preds.detach().cpu().numpy()
    prob = pd.DataFrame()
    prob[0] = preds[:,0]
    prob[1] = preds[:,1]
    prob[2] = preds[:,3]
    prob[3] = preds[:,2]
    pred_y = pred_y.detach().cpu().numpy().tolist()
    y = y.detach().cpu().numpy().tolist()
    total_prob = pd.concat([total_prob,prob])
    total_pred += pred_y
    total_y += y


  return self.transform(self.x[index]),torch.tensor(self.y[index],dtype=torch.long)


In [48]:
total_prob.columns=['0','1','2','3']
total_prob.head()

Unnamed: 0,0,1,2,3
0,-0.204718,-1.788146,0.308982,1.711257
1,-0.180855,-0.374976,-0.254783,0.628443
2,2.111324,-1.151877,-0.386586,-0.23907
3,0.324256,-0.411449,-0.537727,0.646304
4,-2.663526,0.031439,1.212305,0.590142


In [50]:
data['pred_race'] = total_pred
data.head()


Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path,face,pred_race
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
7,865071.0,0.0,profile pics/60154.jpeg,./data/profile pics/60154.jpeg,./data/cropped/60154.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
8,988211.0,0.0,profile pics/60155.jpeg,./data/profile pics/60155.jpeg,./data/cropped/60155.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,0
9,1025311.0,0.0,profile pics/60156.jpeg,./data/profile pics/60156.jpeg,./data/cropped/60156.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
10,1143891.0,3.0,profile pics/60157.jpeg,./data/profile pics/60157.jpeg,./data/cropped/60157.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3


In [51]:
from sklearn import metrics
from sklearn.metrics import classification_report
print(classification_report(data['race'], data['pred_race']))

              precision    recall  f1-score   support

         0.0       0.97      0.56      0.71      1951
         1.0       0.40      0.94      0.56       224
         2.0       0.18      0.82      0.29       136
         3.0       0.31      0.42      0.36        74

    accuracy                           0.61      2385
   macro avg       0.46      0.68      0.48      2385
weighted avg       0.85      0.61      0.66      2385



In [58]:
data.head()

Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path,face,pred_race
5,811618.0,3.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
7,865071.0,0.0,profile pics/60154.jpeg,./data/profile pics/60154.jpeg,./data/cropped/60154.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
8,988211.0,0.0,profile pics/60155.jpeg,./data/profile pics/60155.jpeg,./data/cropped/60155.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,0
9,1025311.0,0.0,profile pics/60156.jpeg,./data/profile pics/60156.jpeg,./data/cropped/60156.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2
10,1143891.0,3.0,profile pics/60157.jpeg,./data/profile pics/60157.jpeg,./data/cropped/60157.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3


In [59]:
changeName = data.copy()
changeName['race'] = np.where(
    changeName['race'] == 2.0, 5.0, changeName['race']
)
changeName['race'] = np.where(
    changeName['race'] == 3.0, 2.0, changeName['race']
)
changeName['race'] = np.where(
    changeName['race'] == 5.0, 3.0, changeName['race']
)
changeName['pred_race'] = np.where(
    changeName['pred_race'] == 2, 5, changeName['pred_race']
)
changeName['pred_race'] = np.where(
    changeName['pred_race'] == 3, 2, changeName['pred_race']
)
changeName['pred_race'] = np.where(
    changeName['pred_race'] == 5, 3, changeName['pred_race']
)
changeName.head()

Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path,face,pred_race
5,811618.0,2.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3
7,865071.0,0.0,profile pics/60154.jpeg,./data/profile pics/60154.jpeg,./data/cropped/60154.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3
8,988211.0,0.0,profile pics/60155.jpeg,./data/profile pics/60155.jpeg,./data/cropped/60155.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,0
9,1025311.0,0.0,profile pics/60156.jpeg,./data/profile pics/60156.jpeg,./data/cropped/60156.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3
10,1143891.0,2.0,profile pics/60157.jpeg,./data/profile pics/60157.jpeg,./data/cropped/60157.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2


In [60]:
labeled_users = './data/labeled_users.csv'
df_users = pd.read_csv(labeled_users)
df_users.head()

Unnamed: 0,user_id,is_female,year_born,race
0,12488.0,0.0,1980.0,4.0
1,719703.0,0.0,1985.0,4.0
2,722153.0,1.0,1973.0,3.0
3,749003.0,0.0,1982.0,5.0
4,755531.0,0.0,1982.0,4.0


In [61]:
changeName = changeName.reset_index(drop=True)
total_prob = total_prob.reset_index(drop=True)
changeName = pd.concat([changeName,total_prob],axis=1,join='inner')
changeName.head()

Unnamed: 0,id,race,img_path,absolute_img_path,cropped_path,face,pred_race,0,1,2,3
0,811618.0,2.0,profile pics/60152.jpeg,./data/profile pics/60152.jpeg,./data/cropped/60152.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3,-0.204718,-1.788146,0.308982,1.711257
1,865071.0,0.0,profile pics/60154.jpeg,./data/profile pics/60154.jpeg,./data/cropped/60154.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3,-0.180855,-0.374976,-0.254783,0.628443
2,988211.0,0.0,profile pics/60155.jpeg,./data/profile pics/60155.jpeg,./data/cropped/60155.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,0,2.111324,-1.151877,-0.386586,-0.23907
3,1025311.0,0.0,profile pics/60156.jpeg,./data/profile pics/60156.jpeg,./data/cropped/60156.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,3,0.324256,-0.411449,-0.537727,0.646304
4,1143891.0,2.0,profile pics/60157.jpeg,./data/profile pics/60157.jpeg,./data/cropped/60157.jpeg,<PIL.Image.Image image mode=RGB size=160x160 a...,2,-2.663526,0.031439,1.212305,0.590142


In [62]:
A = changeName[['id','0','1','2','3']].copy()
B = df_users[['user_id','race']].copy()
B.shape

(4132, 2)

In [63]:
output = B.join(A.set_index('id'), on='user_id')
output.head()

Unnamed: 0,user_id,race,0,1,2,3
0,12488.0,4.0,,,,
1,719703.0,4.0,,,,
2,722153.0,3.0,,,,
3,749003.0,5.0,,,,
4,755531.0,4.0,,,,


In [64]:
output.to_csv('./race_prediction_image/userid_race_predrace.csv',index=False)