Versions of tensorflow and keras that worked on my computer

In [None]:
!pip install tensorflow-gpu==2.4.0

In [None]:
!pip install tensorflow==2.4.0

In [None]:
!pip install keras==2.4.0

Mount Google Drive, imports

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import seaborn as sn
from matplotlib import pyplot as plt

from imageio import imread
from skimage.transform import resize

In [None]:
!pip install keras_vggface

In [None]:
!pip install keras_applications

In [None]:
from tensorflow import keras 
from collections import defaultdict
from glob import glob
from random import choice, sample
from tqdm import tqdm

from keras_vggface import utils

Using TensorFlow backend.


In [None]:
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()  #disable for tensorFlow V2
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

Instructions for updating:
non-resource variables are not supported in the long term


Unzip test images, create test_path, download test data

In [None]:
! unzip "/content/gdrive/MyDrive/CS271Project/test.zip"

In [None]:
test_path = "/content/test/"

In [None]:
test_relationships = pd.read_csv("/content/gdrive/MyDrive/CS271Project/test_competition.csv")
test_labels = pd.read_csv("/content/gdrive/MyDrive/CS271Project/test-labels.csv")

**Make a dataframe of image pairs and their label**

In [None]:
test_relationships = pd.DataFrame(test_relationships)
test_relationships['labels'] = test_labels['labels']

Formatting the image pair strings to match the label spreadsheet

In [None]:
img_pairs = []

for i in range(test_relationships.shape[0]):

  p1 = test_relationships['p1'][i].split("face")[1].split(".")[0]
  p2 = test_relationships['p2'][i].split("face")[1].split(".")[0]

  img_pairs.append('face'+p1.zfill(5)+'.jpg-face'+p2.zfill(5)+'.jpg')

In [None]:
test_relationships['img_pairs'] = img_pairs

In [None]:
# drop irrelevant cols

test_relationships = test_relationships.drop(columns = ['ptype', 'Unnamed: 4', 'index', 'p1', 'p2'])

**Load all 6 models**

In [None]:
model1 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface1.h5')
model2 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface2.h5')
model3 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface3.h5')
model4 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface4.h5')
model5 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface5.h5')
model6 = keras.models.load_model('/content/gdrive/MyDrive/TrainedFaceModels/vggface6.h5')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Necessary functions for testing

In [None]:
# read images

def read_img(path):
    img = cv2.imread(path)
    img = np.array(img).astype(np.float)
    img = cv2.resize(img,(224, 224))  
    img = np.reshape(img,[224,224,3])
    return utils.preprocess_input(img, version=2)

In [None]:
# create batches of 32

def chunker(seq, size=32):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

**Testing the 6 models individually, results printed below**

In [None]:
models = [model1, model2, model3, model4, model5, model6]

for i in range(len(models)):

  predictions = []

  for batch in tqdm(chunker(test_relationships['img_pairs'])):

    X1 = [x.split("-")[0] for x in batch]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X2 = [x.split("-")[1] for x in batch]
    X2 = np.array([read_img(test_path + x) for x in X2])

    pred = models[i].predict([X1, X2]).ravel().tolist()
    predictions += pred

  test_relationships['is_related_pred_model_' + str(i+1)] = predictions

1242it [04:56,  4.18it/s]
1242it [04:55,  4.20it/s]
1242it [04:56,  4.19it/s]
1242it [04:55,  4.20it/s]
1242it [04:55,  4.20it/s]
1242it [04:56,  4.19it/s]


**Testing the ensemble model, results printed below**

In [None]:
models = [model1, model2, model3, model4, model5, model6]

# final list of all predictions
pred_final = []

for batch in tqdm(chunker(test_relationships['img_pairs'])):

  # initialize predictions as list of zeros for element-wise list addition
  temp_pred = [0]*32

  X1 = [x.split("-")[0] for x in batch]
  X1 = np.array([read_img(test_path + x) for x in X1])

  X2 = [x.split("-")[1] for x in batch]
  X2 = np.array([read_img(test_path + x) for x in X2])

  for i in range(len(models)):

    pred = models[i].predict([X1, X2]).ravel().tolist()
    pred2 = [x/6 for x in pred]
    temp_pred = [sum(y) for y in zip(pred2, temp_pred)]

  pred_final.extend(temp_pred)

test_relationships['is_related_pred_model_ensemble'] = pred_final

1242it [17:17,  1.20it/s]


**Save results and display**

In [None]:
test_relationships.to_csv('facemodelresults.csv', index=False)

In [None]:
test_results = pd.DataFrame(pd.read_csv('/content/gdrive/MyDrive/CS271Project/facemodelresults.csv'))

In [None]:
test_results.head()

Unnamed: 0,labels,img_pairs,is_related_pred_model_1,is_related_pred_model_2,is_related_pred_model_3,is_related_pred_model_4,is_related_pred_model_5,is_related_pred_model_6,is_related_pred_model_ensemble
0,1,face03118.jpg-face00322.jpg,9e-06,0.001212,1.44688e-08,0.002122,0.00385,0.020845,0.004673
1,1,face01058.jpg-face00322.jpg,0.000841,0.056212,2.732864e-06,0.021387,0.001012,0.171677,0.041855
2,1,face01859.jpg-face02895.jpg,0.00046,0.014758,0.002954923,0.164864,0.001477,0.050877,0.039232
3,1,face04234.jpg-face02895.jpg,0.157567,0.048498,0.609714,0.335357,0.006344,0.200489,0.226328
4,1,face04234.jpg-face00322.jpg,0.008714,0.011143,1.080209e-07,0.008996,0.001809,0.064981,0.01594


**Calculate performance of individual and ensemble models**

In [None]:
# round each prediction to 0 or 1

for i in range(1,7):
  test_results['is_related_pred_model_' + str(i)] = test_results['is_related_pred_model_' + str(i)].round(0)

test_results['is_related_pred_model_ensemble'] = test_results['is_related_pred_model_ensemble'].round(0)

In [None]:
# models 1-6, individually

for i in range(1,7):

  error_sum = sum(abs(test_results['is_related_pred_model_' + str(i)] - test_results["labels"]))

  print('Accuray of model ' + str(i) + ' is ' + str(round((37943-error_sum)*100/37943,2)) + '%')

Accuray of model 1 is 47.56%
Accuray of model 2 is 47.69%
Accuray of model 3 is 47.66%
Accuray of model 4 is 47.43%
Accuray of model 5 is 47.46%
Accuray of model 6 is 47.79%


In [None]:
# ensemble model

error_sum = sum(abs(test_results['is_related_pred_model_ensemble'] - test_results["labels"]))

print('Accuray of model ensemble is ' + str(round((37943-error_sum)*100/37943,2)) + '%')

Accuray of model ensemble is 47.67%
