#### Library Import

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras import Sequential
from tensorflow.keras.layers import Flatten, Dense
from numpy.linalg import norm
from sklearn.decomposition import PCA
from glob import glob
from IPython.core.display import HTML

ModuleNotFoundError: No module named 'tensorflow.keras'

### 1. Creating the model

#### Model import without the classification layers. Only the feature extraction layers are required.

In [2]:
model = VGG16(weights="imagenet", include_top=False)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


### 2. Auxiliary Functions

#### <p style="color:red">Feature extraction and dimensionality  reduction.</p>

In [None]:
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(650,500))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return(model.predict(x))

def reduce_dimensions(features):
    pca = PCA()
    features = features.reshape(20,15*512)
    pca.fit(features)
    features_trans = pca.transform(features)
    return(np.squeeze(features_trans.reshape(1,400)))

def img_sim_score(features_1, features_2):
    sim = (np.dot(features_1,features_2))/(norm(features_1,2)*norm(features_2,2))
    return(1-sim) 

In [3]:
def img_sim_iterator(image_set_1, image_set_2, img_sim_type):
    final_list = []
    if img_sim_type == 'intra' and all(np.array_equal(m, n) for m, n in zip(image_set_1, image_set_2)):
        for i in range(0,len(image_set_1)):
            for j in range(0,len(image_set_2)):
                if not image_set_1[i].all == image_set_2[j].all:
                    final_list.append(img_sim_score(image_set_1[i],image_set_2[j]))
    elif img_sim_type == 'cross' and not (all(np.array_equal(m, n) for m, n in zip(image_set_1, image_set_2))):
        for i in range(0,len(image_set_1)):
            for j in range(0,len(image_set_2)):
                final_list.append(img_sim_score(image_set_1[i],image_set_2[j]))
    return final_list  

def img_sim_intra_calculator(image_list):
    feat_list = []
    for img in image_list:
        feat_list.append(reduce_dimensions(extract_features(img)))
    return np.mean(img_sim_iterator(feat_list, feat_list, 'intra'))

def img_sim_cross_calculator(image_list):
    feat_list_combined = []
    for ls in image_list:
        feat_list = []
        for img in ls:
            feat_list.append(reduce_dimensions(extract_features(img)))
        feat_list_combined.append(feat_list)
    isc_list = []
    for i in range(0,len(feat_list_combined)):
        temp = feat_list_combined.pop()
        for i in range(0,len(feat_list_combined)):
            isc_list.append(np.mean(img_sim_iterator(temp,feat_list_combined[i],'cross')))
    return np.mean(isc_list)

### 3. Calculating Image Similarity Scores

### <b style="color:red">CEO</b>

#### Loading the images

In [4]:
ceo_ar = [f for f in glob(f'Faces/CEO/Arabic/*')]
ceo_engna = [f for f in glob(f'Faces/CEO/English_NA/*')]
ceo_engwe = [f for f in glob(f'Faces/CEO/English_WE/*')]
ceo_hi = [f for f in glob(f'Faces/CEO/Hindi/*')]
ceo_id = [f for f in glob(f'Faces/CEO/Indonesian/*')]
ceo_mnd = [f for f in glob(f'Faces/CEO/Mandarin/*')]
ceo_ru = [f for f in glob(f'Faces/CEO/Russian/*')]
ceo_es = [f for f in glob(f'Faces/CEO/Spanish/*')]
ceo_sw = [f for f in glob(f'Faces/CEO/Swahili/*')]

ceo_list = [ceo_ar,ceo_engna,ceo_engwe,ceo_hi,ceo_id,ceo_mnd,ceo_ru,ceo_es,ceo_sw]

#### Calculating Img_Sim Scores

In [5]:
ceo_img_sim_intra_scores_list = []
for ls in ceo_list:
    ceo_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

ceo_img_sim_cross_score = img_sim_cross_calculator(ceo_list)
ceo_img_sim_cross_score_list = []
for i in range(0,9):
    ceo_img_sim_cross_score_list.append(ceo_img_sim_cross_score)

#### Creating Data Frame to store results

In [6]:
lang_locale_list = ['Arabic-West Asia & North Africa','English-North America','English-West Europe','Hindi-South Asia','Indonesian-SE Asia','Mandarin-East Asia','Russian-East Europe','Spanish-Latin America','Swahili-Sub Saharan Africa']
column_list = ['Query', 'Lang&Locale','Image_List','ImgSim_Intra','ImgSim_Cross']
ceo_query_list = []
for i in range(0,9):
    ceo_query_list.append('CEO')

ceo_df = pd.DataFrame(list(zip(ceo_query_list, lang_locale_list, ceo_list,ceo_img_sim_intra_scores_list,ceo_img_sim_cross_score_list)), 
               columns = column_list) 

In [13]:
ceo_df.to_csv('Results/ceo.csv')

### <b style="color:red">Engineer</b>

#### Loading the images

In [8]:
engr_ar = [f for f in glob(f'Faces/Engineer/Arabic/*')]
engr_engna = [f for f in glob(f'Faces/Engineer/English_NA/*')]
engr_engwe = [f for f in glob(f'Faces/Engineer/English_WE/*')]
engr_hi = [f for f in glob(f'Faces/Engineer/Hindi/*')]
engr_id = [f for f in glob(f'Faces/Engineer/Indonesian/*')]
engr_mnd = [f for f in glob(f'Faces/Engineer/Mandarin/*')]
engr_ru = [f for f in glob(f'Faces/Engineer/Russian/*')]
engr_es = [f for f in glob(f'Faces/Engineer/Spanish/*')]
engr_sw = [f for f in glob(f'Faces/Engineer/Swahili/*')]

engr_list = [engr_ar,engr_engna,engr_engwe,engr_hi,engr_id,engr_mnd,engr_ru,engr_es,engr_sw]

#### Calculating Img_Sim Scores

In [9]:
engr_img_sim_intra_scores_list = []
for ls in engr_list:
    engr_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

engr_img_sim_cross_score = img_sim_cross_calculator(engr_list)
engr_img_sim_cross_score_list = []
for i in range(0,9):
    engr_img_sim_cross_score_list.append(engr_img_sim_cross_score)

#### Creating Data Frame to store results

In [10]:
engr_query_list = []
for i in range(0,9):
    engr_query_list.append('Engineer')
    
engr_df = pd.DataFrame(list(zip(engr_query_list, lang_locale_list, engr_list, engr_img_sim_intra_scores_list, engr_img_sim_cross_score_list)), 
               columns = column_list) 

In [14]:
engr_df.to_csv('Results/engr.csv')

### <b style="color:red">Nurse</b>

#### Loading the images

In [15]:
nur_ar = [f for f in glob(f'Faces/Nurse/Arabic/*')]
nur_engna = [f for f in glob(f'Faces/Nurse/English_NA/*')]
nur_engwe = [f for f in glob(f'Faces/Nurse/English_WE/*')]
nur_hi = [f for f in glob(f'Faces/Nurse/Hindi/*')]
nur_id = [f for f in glob(f'Faces/Nurse/Indonesian/*')]
nur_mnd = [f for f in glob(f'Faces/Nurse/Mandarin/*')]
nur_ru = [f for f in glob(f'Faces/Nurse/Russian/*')]
nur_es = [f for f in glob(f'Faces/Nurse/Spanish/*')]
nur_sw = [f for f in glob(f'Faces/Nurse/Swahili/*')]

nur_list = [nur_ar,nur_engna,nur_engwe,nur_hi,nur_id,nur_mnd,nur_ru,nur_es,nur_sw]

#### Calculating Img_Sim Scores

In [19]:
nur_img_sim_intra_scores_list = []
for ls in nur_list:
    nur_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

nur_img_sim_cross_score = img_sim_cross_calculator(nur_list)
nur_img_sim_cross_score_list = []
for i in range(0,9):
    nur_img_sim_cross_score_list.append(nur_img_sim_cross_score)

#### Creating Data Frame to store results

In [20]:
nur_query_list = []
for i in range(0,9):
    nur_query_list.append('Nurse')
    
nur_df = pd.DataFrame(list(zip(nur_query_list, lang_locale_list, nur_list, nur_img_sim_intra_scores_list, nur_img_sim_cross_score_list)), 
               columns = column_list) 

In [24]:
nur_df.to_csv('Results/nur.csv')

### <b style="color:red">Politician</b>

#### Loading the images

In [25]:
pol_ar = [f for f in glob(f'Faces/Politician/Arabic/*')]
pol_engna = [f for f in glob(f'Faces/Politician/English_NA/*')]
pol_engwe = [f for f in glob(f'Faces/Politician/English_WE/*')]
pol_hi = [f for f in glob(f'Faces/Politician/Hindi/*')]
pol_id = [f for f in glob(f'Faces/Politician/Indonesian/*')]
pol_mnd = [f for f in glob(f'Faces/Politician/Mandarin/*')]
pol_ru = [f for f in glob(f'Faces/Politician/Russian/*')]
pol_es = [f for f in glob(f'Faces/Politician/Spanish/*')]
pol_sw = [f for f in glob(f'Faces/Politician/Swahili/*')]

pol_list = [pol_ar,pol_engna,pol_engwe,pol_hi,pol_id,pol_mnd,pol_ru,pol_es,pol_sw]

#### Calculating Img_Sim Scores

In [26]:
pol_img_sim_intra_scores_list = []
for ls in pol_list:
    pol_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

pol_img_sim_cross_score = img_sim_cross_calculator(pol_list)
pol_img_sim_cross_score_list = []
for i in range(0,9):
    pol_img_sim_cross_score_list.append(pol_img_sim_cross_score)

#### Creating Data Frame to store results

In [27]:
pol_query_list = []
for i in range(0,9):
    pol_query_list.append('Politician')
    
pol_df = pd.DataFrame(list(zip(pol_query_list, lang_locale_list, pol_list, pol_img_sim_intra_scores_list, pol_img_sim_cross_score_list)), 
               columns = column_list)

In [30]:
pol_df.to_csv('Results/pol.csv')

### <b style="color:red">School Teacher</b>

#### Loading the images

In [31]:
st_ar = [f for f in glob(f'Faces/School Teacher/Arabic/*')]
st_engna = [f for f in glob(f'Faces/School Teacher/English_NA/*')]
st_engwe = [f for f in glob(f'Faces/School Teacher/English_WE/*')]
st_hi = [f for f in glob(f'Faces/School Teacher/Hindi/*')]
st_id = [f for f in glob(f'Faces/School Teacher/Indonesian/*')]
st_mnd = [f for f in glob(f'Faces/School Teacher/Mandarin/*')]
st_ru = [f for f in glob(f'Faces/School Teacher/Russian/*')]
st_es = [f for f in glob(f'Faces/School Teacher/Spanish/*')]
st_sw = [f for f in glob(f'Faces/School Teacher/Swahili/*')]

st_list = [st_ar,st_engna,st_engwe,st_hi,st_id,st_mnd,st_ru,st_es,st_sw]

#### Calculating Img_Sim Scores

In [32]:
st_img_sim_intra_scores_list = []
for ls in st_list:
    st_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

st_img_sim_cross_score = img_sim_cross_calculator(st_list)
st_img_sim_cross_score_list = []
for i in range(0,9):
    st_img_sim_cross_score_list.append(st_img_sim_cross_score)

#### Creating Data Frame to store results

In [33]:
st_query_list = []
for i in range(0,9):
    st_query_list.append('School Teacher')
    
st_df = pd.DataFrame(list(zip(st_query_list, lang_locale_list, st_list, st_img_sim_intra_scores_list, st_img_sim_cross_score_list)), 
               columns = column_list)

In [36]:
st_df.to_csv('Results/st.csv')