In [1]:
import cv2
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow_hub as hub
import re
import pickle
from nltk.stem import PorterStemmer
import math
from tqdm import tqdm

In [2]:
import csv
#In this script, we will focus on the matching ability of the model to link passenger descriptions over different images. 
#Therefore, we extract the image ID, person ID, caption
full_images=list()
person_ids=list()
captions=list()
ious=list()
# Open the CSV file
with open('step_2_df.csv', 'r') as file:
    # Create a CSV reader
    reader = csv.reader(file)
    # Iterate over each row in the CSV file
    for row in reader:
        # Access data in each row
        full_images+= [row[0]]
        person_ids+= [row[2]]
        captions+= [row[4]]
        ious+= [row[3]]
del full_images[0], person_ids[0], captions[0], ious[0]

In [3]:
#Preprocessing of data: Only retain samples annotated with person ID
non_valid_indices=list()
for k in range(len(person_ids)):
    if 'Persoon' not in person_ids[k]:
        non_valid_indices+=[k]

# Create a new list excluding the specified indices
full_images = [value for i, value in enumerate(full_images) if i not in non_valid_indices]
person_ids = [value for i, value in enumerate(person_ids) if i not in non_valid_indices]
captions = [value for i, value in enumerate(captions) if i not in non_valid_indices]
ious = [value for i, value in enumerate(ious) if i not in non_valid_indices]



In [4]:
print(len(captions))

535


In [6]:
#Preprocessing of data: Only retrain samples where captions is created
non_valid_indices=list()
for k in range(len(captions)):
    if len(captions[k])<1:
        non_valid_indices+=[k]
# Create a new list excluding the specified indices
full_images = [value for i, value in enumerate(full_images) if i not in non_valid_indices]
person_ids = [value for i, value in enumerate(person_ids) if i not in non_valid_indices]
captions = [value for i, value in enumerate(captions) if i not in non_valid_indices]
ious = [value for i, value in enumerate(ious) if i not in non_valid_indices]
print(len(captions))


484


In [7]:
#Preprocessing of data: For each image, only 1 description per person is valid. 
##We take the description coupled to the image snippet with highest iou
all_person_ids=list(sorted(set(person_ids)))
print(all_person_ids)
non_valid_indices=list()
for ind in range(len(all_person_ids)):
    person=all_person_ids[ind]
    all_indices_of_person = [i for i, item in enumerate(person_ids) if item==person]

    list_images=list()
    for k in all_indices_of_person:
        list_images+= [full_images[k]]
    duplicates = set([x for x in list_images if list_images.count(x) > 1])
    for k in duplicates:
        indices=[i for i, item in enumerate(list_images) if item==k]
        ioulist=list()
        for iouind in indices:
            ioulist+=[ious[all_indices_of_person[iouind]]]
        max_iou=ioulist.index(max(ioulist))
        for iouind in range(len(indices)):
            if not iouind==max_iou:
                non_valid_indices+=[all_indices_of_person[indices[iouind]]]
full_images = [value for i, value in enumerate(full_images) if i not in non_valid_indices]
person_ids = [value for i, value in enumerate(person_ids) if i not in non_valid_indices]
captions = [value for i, value in enumerate(captions) if i not in non_valid_indices]
ious = [value for i, value in enumerate(ious) if i not in non_valid_indices]


['Persoon 1', 'Persoon 2', 'Persoon 3', 'Persoon 4', 'Persoon 5', 'Persoon 6', 'Persoon 7', 'Persoon 8', 'Persoon 9']


In [8]:
print(len(full_images))

136


In [9]:
print(captions)

['A woman in a black coat is walking down the street with her hands in her pockets ', 'A person in a black coat is walking in the street with a pair of black gloves on his shoulder ', 'A woman in a black coat is walking down the street with her hands in her pockets ', 'A person in a black coat is walking in the street with a pair of black gloves on his shoulder ', 'A woman in a black coat is walking down the street with a black bag on her head ', 'A man in a black coat is walking down the street with his hands in his pockets ', 'A man in a black coat is walking down the street with his hands in his pockets ', 'A man in a black coat is walking on the road with his hands in his pockets ', 'A man in a black coat is walking down the street with his hands in his pockets ', 'A man in a black coat is walking down the road with his hands in his pockets ', 'A man in a black coat is walking down the street with his hands in his pockets ', 'A man in a black suit is wearing a black jacket and blac

In [10]:
#Create embeddings for all the descriptions
embedding_model=tf.keras.models.load_model('Model_sentence_similarity')
embeddings=list(embedding_model.predict(captions))



In [16]:
print(len(embeddings))

127


In [11]:
#Create gallery with descriptions for each person, those instances will be removed from the dataset.
descriptions_gallery=list()
embeddings_gallery=list()
inds_gallery=list()
for ind in range(len(all_person_ids)):
    person=all_person_ids[ind]
    all_indices_of_person = [i for i, item in enumerate(person_ids) if item==person]
    inds_gallery+=[all_indices_of_person[0]]
    descriptions_gallery+=[captions[all_indices_of_person[0]]]
    embeddings_gallery+=[embeddings[all_indices_of_person[0]]]
    del embeddings[all_indices_of_person[0]],full_images[all_indices_of_person[0]], person_ids[all_indices_of_person[0]], captions[all_indices_of_person[0]], ious[all_indices_of_person[0]]

In [12]:
print(descriptions_gallery)

['A woman in a black coat is walking down the street with her hands in her pockets ', 'A person in a black coat is walking in the street with a pair of black gloves on his shoulder ', 'A woman in a black coat is walking down the street with her hands in her pockets ', 'A man in a black coat is walking down the road with his hands in his pockets ', 'A person in a black coat is walking in the street with a pair of black gloves on his shoulder ', 'A woman in a black coat is walking down the street with a black bag on her head ', 'A man in a black coat is walking down the street with his hands in his pockets ', 'A man in a black coat is walking through the door of a car ', 'A man in a black suit is wearing a black jacket and black pants. He is carrying a pair of black scissors in his']


In [13]:
def square_rooted(x):
    return math.sqrt(sum([a*a for a in x]))

def cosine_similarity_cal(x,y):
    numerator = sum(a*b for a,b in zip(x,y))
    denominator = square_rooted(x)*square_rooted(y)
    return numerator/float(denominator)

In [14]:
#As a final step, we will compute cosine similarity between all embeddings in the data set and the gallery. 
#The predicted person ID will be the one with highest cosine similarity
#If the predicted person matches the true label, we have a correct prediction (total_hits)
predicted_person=['']*len(embeddings)
total_hits=0
for k in range(len(embeddings)):
    cosine_similarityscore=np.zeros((len(embeddings_gallery),1))
    for l in range(len(embeddings_gallery)):
        cosine_similarityscore[l]=cosine_similarity_cal(embeddings[k],embeddings_gallery[l])
    max_similarity_index=np.argmax(cosine_similarityscore)
    predicted_person[k]=all_person_ids[max_similarity_index]
    if predicted_person[k]==person_ids[k]:
        total_hits+=1
    

In [15]:
#Finally we can compute the accuracy of the model. 
#The accuracy is defined by the number of hits divided by the number of samples in the dataset. 
#Note that the base accuracy is 11,1% since there are 9 persons in the gallery. 

accuracy=total_hits/len(embeddings)
print(accuracy)

0.13385826771653545


Final accuracy 13,4%

In [17]:
print(total_hits)

17
