In [198]:
from collections import defaultdict
from glob import glob
from random import choice, sample


from tensorflow.keras.layers import Input, Conv2D, Lambda, Dense, Flatten,MaxPooling2D,Activation, Dropout, BatchNormalization,  GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Subtract,Multiply
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from skimage.io import imshow
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import cv2
import os
from tqdm import tqdm
from tensorflow.keras.applications.resnet50 import preprocess_input
from random import sample, choice

In [121]:
# File paths
train_file_path = "data/train/"
train_relationships_path = "../csv_files/train_relationships.csv"
#the validation set will be family members F09...
validation_set = "F09"

#get all the images
all_images = glob(train_file_path + "*/*/*.jpg")

#seperate the train and validation sets
train_images = [img for img in all_images if validation_set not in img]
val_images = [img for img in all_images if validation_set in img]
    

In [122]:
print(len(all_images))

12379


In [123]:
#create a dictionary with key=family member and value=list of pictures of family member
def fam_mem_pics(all_images):
    #create dictionary
    fam_member_dict = {}
    #create list for member pictures
    fam_member_lst = []
    i = 0
    while i < len(all_images)-1:
        #get the family member as key
        split_path = all_images[i].split('/')
        key = split_path[2]+'/'+split_path[3]
        #check if picture is about the same fmaily member
        same_member = True

        while same_member:
            #check if same family member
            if key in all_images[i]:
                #append the image to the family member list
                fam_member_lst.append(all_images[i])
                
            else:
                #changed family member
                same_member = False
            
            #update the counter to not go out of bounds
            if (i+1) < len(all_images):  
                i += 1
            else:
                #break if out of bounds
                break
                
        #check if key is already in dictionary - just in case the images are out of order
        if key in fam_member_dict.keys():
            #combine list
            fam_member_dict[key] = fam_member_dict[key] + fam_member_lst
        else: 
            #create key=family member and value= list of family member's pictures
            fam_member_dict[key] = fam_member_lst
        
        fam_member_lst = []
        
    #return the member_dictionary 
    return fam_member_dict


In [124]:
#get a family member image dictionary
map_train_person_to_images = fam_mem_pics(train_images)
map_val_person_to_images = fam_mem_pics(val_images)

In [125]:
map_train_person_to_images

{'F0832/MID1': ['data/train/F0832/MID1/P08797_face2.jpg',
  'data/train/F0832/MID1/P08791_face1.jpg',
  'data/train/F0832/MID1/P08795_face1.jpg',
  'data/train/F0832/MID1/P08793_face1.jpg',
  'data/train/F0832/MID1/P08799_face2.jpg',
  'data/train/F0832/MID1/P08794_face4.jpg',
  'data/train/F0832/MID1/P08792_face1.jpg'],
 'F0832/MID2': ['data/train/F0832/MID2/P08791_face2.jpg',
  'data/train/F0832/MID2/P08793_face5.jpg',
  'data/train/F0832/MID2/P08795_face2.jpg',
  'data/train/F0832/MID2/P08798_face2.jpg',
  'data/train/F0832/MID2/P08800_face1.jpg',
  'data/train/F0832/MID2/P08794_face1.jpg',
  'data/train/F0832/MID2/P08792_face3.jpg'],
 'F0832/MID5': ['data/train/F0832/MID5/P08796_face3.jpg',
  'data/train/F0832/MID5/P08793_face6.jpg',
  'data/train/F0832/MID5/P08792_face4.jpg'],
 'F0832/MID4': ['data/train/F0832/MID4/P08796_face2.jpg',
  'data/train/F0832/MID4/P08798_face1.jpg',
  'data/train/F0832/MID4/P08793_face3.jpg',
  'data/train/F0832/MID4/P08800_face2.jpg',
  'data/train/F08

In [126]:
#get the raltionships dataframe
df = pd.read_csv("csv_files/train_relationships.csv")

#get isolate the the family/member portions to compare
all_family_members = []
for img in all_images:
    split_path = img.split('/')
    all_family_members.append(split_path[2]+'/'+split_path[3])

#some relationships are not present within the relationship csv
#remove them by only keeping the ones mentioned in the dataset
df = df[df['p1'].isin(all_family_members)]
df = df[df['p2'].isin(all_family_members)]

In [127]:
#seperate the training and validation labels
train = df[~df['p1'].str.contains(validation_set)]
val = df[df['p1'].str.contains(validation_set)]

#turn dataframes to tuples to get labels
train = list(zip(train['p1'].values, train['p2'].values))
val = list(zip(val['p1'].values, val['p2'].values))

print(len(train))
print(len(val))

3066
296


In [128]:
print(len(train))
train[:10] #labels

3066


[('F0002/MID1', 'F0002/MID3'),
 ('F0002/MID2', 'F0002/MID3'),
 ('F0005/MID1', 'F0005/MID2'),
 ('F0005/MID3', 'F0005/MID2'),
 ('F0009/MID1', 'F0009/MID4'),
 ('F0009/MID1', 'F0009/MID3'),
 ('F0009/MID1', 'F0009/MID2'),
 ('F0009/MID1', 'F0009/MID6'),
 ('F0009/MID2', 'F0009/MID4'),
 ('F0009/MID2', 'F0009/MID6')]

In [202]:
#create a generator
def gen(family_member_labels, family_member_map, batch_size=16):
    while True:
        #sample from half of true labels - don't repeat grab
        half_batch_size = int(batch_size/2)
        batch_family_members = sample(family_member_labels, half_batch_size)
        #all these should be of label 1
        label = np.ones(len(batch_family_members))

        #grab data that isn't a combination from the labels set
        # create a set of size of 'batch size'
        fam_mem_keys = list(family_member_map.keys())
        while len(batch_family_members) < batch_size:
            #get random numbers to choose
            rand_num = np.random.randint(0, len(fam_mem_keys), 2)
            #get random family member
            fam_mem_1 = fam_mem_keys[rand_num[0]]
            fam_mem_2 = fam_mem_keys[rand_num[1]]

            #check if the two random people are family members - do not want that
            condition_1 = (fam_mem_1, fam_mem_2)
            condition_2 = (fam_mem_2, fam_mem_1)
            if fam_mem_1 not in fam_mem_2 and condition_1 not in family_member_labels and condition_2 not in family_member_labels:
                #add onto combination as not being family
                batch_family_members.append(condition_1)
                #add label as 0 for not family
                label = np.concatenate([label,[0]])

    #     print(batch_family_members)
    #     print(label)
    #     print('------------------')

        person_1_imgs = []
        person_2_imgs = []
        #get photos for each person
        for fam_mem_tup in batch_family_members:
            #peson 1's set of images
            mem_1_all_imgs = family_member_map[fam_mem_tup[0]]
            #person 2's set of images
            mem_2_all_imgs = family_member_map[fam_mem_tup[1]]

            #select random image
            person_1_choice = choice(mem_1_all_imgs)
            person_2_choice = choice(mem_2_all_imgs)
            #turn the images into arrays
            for person, pic_path in enumerate([person_1_choice, person_2_choice]):
                #read the image
                img = cv2.imread(pic_path)
                #turn to array
                img = np.array(img).astype(np.float)
                img_arr = preprocess_input(img)

                #add to lst
                if person == 0: #person 1
                    person_1_imgs.append(img_arr)
                else: #person 2
                    person_2_imgs.append(img_arr)

        #turn to numpy arrays
        person_1_imgs = np.asarray(person_1_imgs)
        person_2_imgs = np.asarray(person_2_imgs)

    #     print(label)
    #     print('------------------')
    #     print(len(person_1_imgs))
    #     print(type(person_1_imgs))
    #     print('------------------')
    #     print(len(person_2_imgs))
    #     print(type(person_2_imgs))


        yield [person_1_imgs, person_2_imgs], label

In [203]:
gen(train, train_person_to_images_map, batch_size=16)

<generator object gen at 0x13db49480>

In [211]:
from tensorflow.keras import models

In [214]:
from keras.models import model_from_json
model = model_from_json(open("facenet_model.json", "r").read())
model.load_weights('facenet_weights.h5')
model.summary()

FileNotFoundError: [Errno 2] No such file or directory: 'facenet_model.json'

In [213]:
# img = read_img('../data/train/F0009/MID1/P10569_face2.jpg')
facenet_model = models.load_model("facenet/facenet_keras.h5")
facenet_model.load_weights("facenet/facenet_keras_weights.h5")

OSError: Unable to open file (file signature not found)

In [219]:
from tensorflow.keras.applications.vgg19 import VGG19

def baseline_model():
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))

#     base_model = VGGFace(model='resnet50', include_top=False)
    
    base_model = VGG19(weights='imagenet', include_top=False)
    
    for x in base_model.layers[:-3]:
        x.trainable = True

    x1 = base_model(input_1)
    x2 = base_model(input_2)

    # x1_ = Reshape(target_shape=(7*7, 2048))(x1)
    # x2_ = Reshape(target_shape=(7*7, 2048))(x2)
    #
    # x_dot = Dot(axes=[2, 2], normalize=True)([x1_, x2_])
    # x_dot = Flatten()(x_dot)

    x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)])

    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])

    x = Multiply()([x1, x2])

    x = Concatenate(axis=-1)([x, x3])

    x = Dense(100, activation="relu")(x)
    x = Dropout(0.01)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))

#     model.summary()

    return model


In [220]:
model = baseline_model()
# model.load_weights(file_path)
model.fit_generator(gen(train, train_person_to_images_map, batch_size=16), use_multiprocessing=True,
                    validation_data=gen(val, val_person_to_images_map, batch_size=16), epochs=20, verbose=2,
                    workers=4, steps_per_epoch=200, validation_steps=100)

W0529 17:16:21.793153 4540057024 training_generator.py:409] Using a generator with `use_multiprocessing=True` and multiple workers may duplicate your data. Please consider using the `keras.utils.Sequence` class.


Epoch 1/20


Process ForkPoolWorker-12:
Process ForkPoolWorker-10:
Process ForkPoolWorker-9:
Process ForkPoolWorker-11:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/anaconda3/lib/python

Epoch 1/20


  File "/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/anaconda3/lib/python3.7/multiprocessing/queues.py", line 352, in get
    res = self._reader.recv_bytes()
  File "/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/anaconda3/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
Key

KeyboardInterrupt: 

In [206]:
# def read_img(path):
#     img = cv2.imread(path)
#     img = np.array(img).astype(np.float)
#     return preprocess_input(img)


# def gen(list_tuples, person_to_images_map, batch_size=16):
#     ppl = list(person_to_images_map.keys())
#     while True:
#         batch_tuples = sample(list_tuples, batch_size // 2)
#         labels = [1] * len(batch_tuples)
# #         print(batch_tuples)
# #         print(labels)
#         while len(batch_tuples) < batch_size:
#             p1 = choice(ppl)
#             p2 = choice(ppl)
# #             print('p1: ', p1)
# #             print('p2: ', p2)
#             if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
#                 batch_tuples.append((p1, p2))
#                 labels.append(0)
# #         print(batch_tuples)
# #         print(labels)
#         for x in batch_tuples:
# #             print('x:, ', x)
#             if not len(person_to_images_map[x[0]]):
# #                 print('x[0]:, ', x)
#                 print(x[0])
# #         print('------------------------')
#         X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
# #         print(len(X1))
# #         print(X1)
# #         print('------------------------')
#         X1 = np.array([read_img(x) for x in X1])
# #         print(X1)
# #         print('------------------------')
#         X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
#         X2 = np.array([read_img(x) for x in X2])
        
# #         print(type(X1))
# #         print(type(X2))
# #         print([X1,X2])
#         yield [X1, X2], labels


# gen(train, train_person_to_images_map, batch_size=16)

<generator object gen at 0x13db494f8>

In [None]:
# gen(train, train_person_to_images_map, batch_size=16)

In [None]:
# def gen(list_tuples, person_to_images_map, batch_size=16):
#     ppl = list(person_to_images_map.keys())
#     while True:
#         batch_tuples = sample(list_tuples, batch_size // 2)
#         labels = [1] * len(batch_tuples)
#         while len(batch_tuples) < batch_size:
#             p1 = choice(ppl)
#             p2 = choice(ppl)

#             if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
#                 batch_tuples.append((p1, p2))
#                 labels.append(0)

#         for x in batch_tuples:
#             if not len(person_to_images_map[x[0]]):
#                 print(x[0])

#         X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
#         X1 = np.array([read_img(x) for x in X1])

#         X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
#         X2 = np.array([read_img(x) for x in X2])
        
# #         print(X1)
        
#         yield [X1, X2], labels
        
# gen(train, train_person_to_images_map, batch_size=16)

In [None]:
# pip install opencv-python