In [1]:
import numpy as np
import numpy.linalg as linalg
import pickle, os, time
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
model = ResNet50(weights='imagenet', include_top=False,
                 input_shape=(224, 224, 3))



In [3]:
img_path = 'testImage.jpg'
img = image.load_img(img_path, target_size=(224, 224))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)
features = model.predict(img_data)
print(features.shape)

(1, 7, 7, 2048)


In [4]:
# function that gets features from resnet
def get_features(model, img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data).flatten()
    features = features / linalg.norm(features)
    return features 

In [50]:
# gets the euclidean distance and cosine loss between a pair of feature representation
def get_difference(feature1, feature2):
    euclidean_loss = np.power(feature1 - feature2, 2)
    euclidean_loss = np.sqrt(np.sum(euclidean_loss))
#     cosine_loss = (feature1.T @ feature2)
    return euclidean_loss

In [58]:
mypath = '/Users/dorazhao/Desktop/images/'
files_path = [mypath + x for x in os.listdir(mypath) if not x.startswith('.')]

In [59]:
coco_m, coco_f, sim_m, sim_f, coco_m_files, coco_f_files, sim_m_files, sim_f_files = ([] for i in range(8))

for file in files_path:
    base = os.path.basename(file).split('_')
    features = get_features(model, file)
    if base[0] is 'f' and base[1] is 'f':
        sim_f.append(features)
        sim_f_files.append(file)
    elif base[0] is 'f' and base[1] is 'm':
        sim_m.append(features)
        sim_m_files.append(file)
    elif base[0] is 'm' and base[1] is 'm':
        coco_m.append(features)
        coco_m_files.append(file)
    else: 
        coco_f.append(features)
        coco_f_files.append(file)

In [61]:
import csv
with open('similarMatch.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['COCO Image', 'Match Image', 'Euclidean Loss'])
    for i, male in enumerate(coco_m):
        diffs = []
        for j, female in enumerate(sim_f):
            euc = get_difference(male, female)
            diffs.append(euc)
        best_i = np.argsort(diffs)[0]
        coco_file = os.path.basename(coco_m_files[i])
        sim_f_file = os.path.basename(sim_f_files[best_i])
        print('COCO File: {0}, Similar Match: {1}, Euclidean Loss: {2}'.format(coco_file, sim_f_file, diffs[best_i]))
        writer.writerow([coco_file, sim_f_file, diffs[best_i]])

COCO File: m_m_frisbee_227482.jpg, Similar Match: f_f_sports_135604_4.jpg, Euclidean Loss: 1.1695038080215454
COCO File: m_m_frisbee_88485.jpg, Similar Match: f_f_frisbee_88485_2.jpg, Euclidean Loss: 1.1360191106796265
COCO File: m_m_tie_152214.jpg, Similar Match: f_f_tie_131444_1.jpg, Euclidean Loss: 1.1849696636199951
COCO File: m_m_surfboard_115898.jpg, Similar Match: f_f_surfboard_115898_1.jpg, Euclidean Loss: 0.9874610900878906
COCO File: m_m_frisbee_328238.jpg, Similar Match: f_f_sports_135604_4.jpg, Euclidean Loss: 1.1178133487701416
COCO File: m_m_sports_135604.jpg, Similar Match: f_f_sports_371552_4.jpg, Euclidean Loss: 1.1168785095214844
COCO File: m_m_tie_170099.jpg, Similar Match: f_f_frisbee_325991_1.jpg, Euclidean Loss: 1.2257771492004395
COCO File: m_m_racket_88970.jpg, Similar Match: f_f_sports_429690_1.jpg, Euclidean Loss: 1.1238709688186646
COCO File: m_m_tie_21604.jpg, Similar Match: f_f_tie_21604_1.jpg, Euclidean Loss: 1.1359667778015137
COCO File: m_m_surfboard_325

In [62]:
with open('similarMatch.csv', 'a') as f:
    writer = csv.writer(f)
    for i, female in enumerate(coco_f):
        diffs = []
        for j, male in enumerate(sim_m):
            euc = get_difference(female, male)
            diffs.append(euc)
        best_i = np.argsort(diffs)[0]
        coco_file = os.path.basename(coco_f_files[i])
        sim_m_file = os.path.basename(sim_m_files[best_i])
        print('COCO File: {0}, Similar Match: {1}, Euclidean Loss: {2}'.format(coco_file, sim_m_file, diffs[best_i]))
        writer.writerow([coco_file, sim_m_file, diffs[best_i]])

COCO File: m_f_handbag_81594.jpg, Similar Match: f_m_handbag_81594_3.jpg, Euclidean Loss: 1.198861002922058
COCO File: m_f_toothbrush_445999.jpg, Similar Match: f_m_toothbrush_445999_2.jpg, Euclidean Loss: 1.1346672773361206
COCO File: m_f_refrigerator_22705.jpg, Similar Match: f_m_refrigerator_456662_1.jpg, Euclidean Loss: 1.1000168323516846
COCO File: m_f_hairdrier_350002_2.jpg, Similar Match: f_m_hairdrier_350002_5.jpg, Euclidean Loss: 1.1814556121826172
COCO File: m_f_refrigerator_280930.jpg, Similar Match: f_m_refrigerator_150417_1.jpg, Euclidean Loss: 1.2233757972717285
COCO File: m_f_refrigerator_280918.jpg, Similar Match: f_m_refrigerator_280930_1.jpg, Euclidean Loss: 1.212421178817749
COCO File: m_f_toothbrush_465179.jpg, Similar Match: f_m_toothbrush_465179_1.jpg, Euclidean Loss: 1.215056300163269
COCO File: m_f_refrigerator_150417.jpg, Similar Match: f_m_refrigerator_150417_1.jpg, Euclidean Loss: 1.138541579246521
COCO File: m_f_toothbrush_324614.jpg, Similar Match: f_m_toot

In [66]:
min(diffs)

1.1951921