##Cloning Data

In [1]:
!git clone https://github.com/digs1998/2021_internship_exc.git

Cloning into '2021_internship_exc'...
remote: Enumerating objects: 146, done.[K
remote: Counting objects: 100% (146/146), done.[K
remote: Compressing objects: 100% (145/145), done.[K
remote: Total 146 (delta 11), reused 112 (delta 0), pack-reused 0[K
Receiving objects: 100% (146/146), 737.99 KiB | 16.77 MiB/s, done.
Resolving deltas: 100% (11/11), done.


##Loading Libraries

In [2]:
import numpy as np
import argparse
import os
import matplotlib.pyplot as plt
from scipy import spatial
import random
import ssl
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
import pickle 

from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [3]:
##testing GPU
tf.test.gpu_device_name()

'/device:GPU:0'

In [4]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
      img = load_img(os.path.join(folder,filename),  target_size=(224, 224))
      img = img_to_array(img)
      img = img.reshape((1,) + img.shape)
      if img is not None:
        images.append(img)
    return images

In [5]:
def get_all_image_arrays():
  image1 = load_images_from_folder('/content/2021_internship_exc/data/color')
  image2 = load_images_from_folder('/content/2021_internship_exc/data/grayscale')

  all_arrays = np.array([image1+image2])
  return all_arrays

## Cosine Similarity

In [6]:
# # consine similarity
# def cosine_similarity(ratings):
#     sim = ratings.dot(ratings.T)
#     norms = np.array([np.sqrt(np.diagonal(sim))])
#     return (sim / norms / norms.T)

## Creating Datasets from Color Images

In [7]:
files = os.listdir("/content/2021_internship_exc/data/color")
ytest = []
xtest = []
for file in files:
  abs_file_path = '/content/2021_internship_exc/data/color/' + file
  img = image.load_img(abs_file_path, target_size=(224, 224))
  ytest.append((file[0:2]))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  if len(xtest) > 0:
    xtest = np.concatenate((xtest, x))
  else:
    xtest = x

In [8]:
xtest.shape

(60, 224, 224, 3)

##Creating Model VGG-16

In [9]:
# convert input to VGG format
xtest = preprocess_input(xtest)

# include_top=False: exclude top(last) 3 fully-connected layers. get features dim=(1,7,7,512)
model = VGG16(weights='imagenet', include_top=False)

# use VGG to extract features
features = model.predict(xtest)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
# flatten as one dimension
features_compress = features.reshape(len(ytest), 7 * 7 * 512)

# compute consine similarity
cos_sim = cosine_similarity(features_compress)

In [11]:
print(cos_sim)

[[1.         0.54535335 0.6393365  ... 0.83716935 0.889364   0.54535335]
 [0.54535335 1.         0.50482345 ... 0.51372063 0.5261925  1.        ]
 [0.6393365  0.50482345 1.0000001  ... 0.62474465 0.6186621  0.50482345]
 ...
 [0.83716935 0.51372063 0.62474465 ... 1.         0.8998418  0.51372063]
 [0.889364   0.5261925  0.6186621  ... 0.8998418  1.0000002  0.5261925 ]
 [0.54535335 1.         0.50482345 ... 0.51372063 0.5261925  1.        ]]


In [12]:
##saving as npz file
with open('vgg16_similarity.npz', 'wb') as pickle_file:
    pickle.dump(cos_sim, pickle_file)

## Summary
- I have implemented vgg-16 based cosine similarity model.
- I didn't use the suggested input parameters, as I wasn't sure on how to proceed with them.
- with respect to the output generation, I have created a .npz file