In [1]:
import flask
import difflib
import pandas as pd
from flask import Flask, request, render_template
app = Flask(__name__)
import os
from PIL import Image
import torch
import torchvision
from torchvision import transforms

In [2]:
# needed input dimensions for the CNN
inputDim = (224,224)
inputDir = "OriginalImages"
inputDirCNN = "inputImagesCNN"

os.makedirs(inputDirCNN, exist_ok = True)

transformationForCNNInput = transforms.Compose([transforms.Resize(inputDim)])

for imageName in os.listdir(inputDir):
    I = Image.open(os.path.join(inputDir, imageName))
    newI = transformationForCNNInput(I)

    # copy the rotation information metadata from original image and save, else your transformed images may be rotated
    exif = I.info['exif']
    newI.save(os.path.join(inputDirCNN, imageName), exif=exif)
    
    newI.close()
    I.close()

In [3]:
import torch
from tqdm import tqdm
from torchvision import models

# for this prototype we use no gpu, cuda= False and as model resnet18 to obtain feature vectors

class Img2VecResnet18():
    def __init__(self):
        
        self.device = torch.device("cpu")
        self.numberFeatures = 512
        self.modelName = "resnet-18"
        self.model, self.featureLayer = self.getFeatureLayer()
        self.model = self.model.to(self.device)
        self.model.eval()
        self.toTensor = transforms.ToTensor()
        
        # normalize the resized images as expected by resnet18
        # [0.485, 0.456, 0.406] --> normalized mean value of ImageNet, [0.229, 0.224, 0.225] std of ImageNet
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
    def getVec(self, img):
        image = self.normalize(self.toTensor(img)).unsqueeze(0).to(self.device)
        embedding = torch.zeros(1, self.numberFeatures, 1, 1)

        def copyData(m, i, o): embedding.copy_(o.data)

        h = self.featureLayer.register_forward_hook(copyData)
        self.model(image)
        h.remove()

        return embedding.numpy()[0, :, 0, 0]

    def getFeatureLayer(self):
        
        cnnModel = models.resnet18(pretrained=True)
        layer = cnnModel._modules.get('avgpool')
        self.layer_output_size = 512
        
        return cnnModel, layer
        

# generate vectors for all the images in the set
img2vec = Img2VecResnet18() 

allVectors = {}
print("Converting images to feature vectors:")
for image in tqdm(os.listdir("inputImagesCNN")):
    I = Image.open(os.path.join("inputImagesCNN", image))
    vec = img2vec.getVec(I)
    allVectors[image] = vec
    I.close()

  0%|          | 0/32 [00:00<?, ?it/s]

Converting images to feature vectors:


100%|██████████| 32/32 [00:05<00:00,  5.45it/s]


In [4]:
# now let us define a function that calculates the cosine similarity entries in the similarity matrix
import pandas as pd
import numpy as np

def getSimilarityMatrix(vectors):
    v = np.array(list(vectors.values())).T
    sim = np.inner(v.T, v.T) / ((np.linalg.norm(v, axis=0).reshape(-1,1)) * ((np.linalg.norm(v, axis=0).reshape(-1,1)).T))
    keys = list(vectors.keys())
    matrix = pd.DataFrame(sim, columns = keys, index = keys)
    
    return matrix
        
similarityMatrix = getSimilarityMatrix(allVectors)

In [5]:
similarityMatrix

Unnamed: 0,buildings1.jpg,camper2.jpg,pyramiden1.jpg,donkey4.jpg,donkey5.jpg,pyramiden0.jpg,camper3.jpg,buildings0.jpg,buildings2.jpg,camper1.jpg,...,trees4.jpg,rio4.jpg,trees1.jpg,rio0.jpg,rio1.jpg,trees0.jpg,trees2.jpg,rio3.jpg,rio2.jpg,trees3.jpg
buildings1.jpg,1.0,0.540462,0.697433,0.583388,0.552881,0.677508,0.558861,0.856376,0.858726,0.599126,...,0.406242,0.611961,0.432901,0.605854,0.581421,0.426518,0.433913,0.594514,0.57156,0.359845
camper2.jpg,0.540462,1.0,0.614439,0.624673,0.609425,0.642414,0.810027,0.524737,0.528007,0.81724,...,0.591454,0.600308,0.596422,0.569785,0.6324,0.565924,0.590415,0.614955,0.613877,0.53449
pyramiden1.jpg,0.697433,0.614439,1.0,0.611586,0.599755,0.743108,0.695042,0.714783,0.703654,0.608747,...,0.526139,0.686179,0.558244,0.647835,0.686662,0.492586,0.502692,0.715879,0.747089,0.457939
donkey4.jpg,0.583388,0.624673,0.611586,1.0,0.865711,0.587103,0.623971,0.582863,0.578287,0.638439,...,0.520755,0.587648,0.605618,0.507885,0.56076,0.551006,0.582851,0.568415,0.529068,0.482132
donkey5.jpg,0.552881,0.609425,0.599755,0.865711,1.0,0.562975,0.581375,0.5583,0.56732,0.603017,...,0.477957,0.553333,0.574379,0.519078,0.574645,0.520358,0.531918,0.547967,0.524159,0.458996
pyramiden0.jpg,0.677508,0.642414,0.743108,0.587103,0.562975,1.0,0.65511,0.717729,0.66548,0.64106,...,0.629552,0.69105,0.574963,0.659589,0.645607,0.548091,0.567814,0.695305,0.681681,0.530435
camper3.jpg,0.558861,0.810027,0.695042,0.623971,0.581375,0.65511,1.0,0.518374,0.536851,0.704012,...,0.567717,0.630525,0.564895,0.577389,0.656104,0.545239,0.542831,0.656499,0.667522,0.498665
buildings0.jpg,0.856376,0.524737,0.714783,0.582863,0.5583,0.717729,0.518374,1.0,0.870876,0.610174,...,0.440411,0.600621,0.495273,0.554477,0.545171,0.450646,0.481875,0.587631,0.542091,0.398767
buildings2.jpg,0.858726,0.528007,0.703654,0.578287,0.56732,0.66548,0.536851,0.870876,1.0,0.585888,...,0.410176,0.605115,0.476666,0.589767,0.572505,0.458676,0.450332,0.587394,0.556251,0.390784
camper1.jpg,0.599126,0.81724,0.608747,0.638439,0.603017,0.64106,0.704012,0.610174,0.585888,1.0,...,0.523408,0.580385,0.558723,0.550824,0.573379,0.514431,0.565854,0.553948,0.519797,0.475578


In [6]:
from numpy.testing import assert_almost_equal
import pickle

k = 5 # the number of top similar images to be stored

similarNames = pd.DataFrame(index = similarityMatrix.index, columns = range(k))
similarValues = pd.DataFrame(index = similarityMatrix.index, columns = range(k))

for j in tqdm(range(similarityMatrix.shape[0])):
    kSimilar = similarityMatrix.iloc[j, :].sort_values(ascending = False).head(k)
    similarNames.iloc[j, :] = list(kSimilar.index)
    similarValues.iloc[j, :] = kSimilar.values
    
similarNames.to_pickle("similarNames.pkl")
similarValues.to_pickle("similarValues.pkl")

100%|██████████| 32/32 [00:00<00:00, 236.06it/s]


In [7]:
similarNames

Unnamed: 0,0,1,2,3,4
buildings1.jpg,buildings1.jpg,buildings2.jpg,buildings5.jpg,buildings4.jpg,buildings0.jpg
camper2.jpg,camper2.jpg,camper0.jpg,camper1.jpg,camper3.jpg,camper4.jpg
pyramiden1.jpg,pyramiden1.jpg,pyramiden4.jpg,rio2.jpg,pyramiden0.jpg,pyramiden3.jpg
donkey4.jpg,donkey4.jpg,donkey5.jpg,donkey0.jpg,donkey3.jpg,donkey2.jpg
donkey5.jpg,donkey5.jpg,donkey4.jpg,donkey0.jpg,donkey3.jpg,donkey2.jpg
pyramiden0.jpg,pyramiden0.jpg,pyramiden4.jpg,pyramiden3.jpg,pyramiden1.jpg,buildings0.jpg
camper3.jpg,camper3.jpg,camper2.jpg,camper0.jpg,pyramiden4.jpg,camper4.jpg
buildings0.jpg,buildings0.jpg,buildings2.jpg,buildings4.jpg,buildings5.jpg,buildings1.jpg
buildings2.jpg,buildings2.jpg,buildings0.jpg,buildings1.jpg,buildings5.jpg,buildings4.jpg
camper1.jpg,camper1.jpg,camper2.jpg,camper4.jpg,camper0.jpg,camper3.jpg


In [8]:
similarValues

Unnamed: 0,0,1,2,3,4
buildings1.jpg,1,0.858726,0.858274,0.857423,0.856376
camper2.jpg,1,0.8877,0.81724,0.810027,0.772123
pyramiden1.jpg,1,0.747345,0.747089,0.743108,0.71774
donkey4.jpg,1,0.865711,0.799056,0.79747,0.702243
donkey5.jpg,1,0.865711,0.814323,0.780378,0.673851
pyramiden0.jpg,1,0.799683,0.791681,0.743108,0.717729
camper3.jpg,1,0.810027,0.767659,0.747246,0.718606
buildings0.jpg,1,0.870876,0.864557,0.858775,0.856376
buildings2.jpg,1,0.870876,0.858726,0.851488,0.84619
camper1.jpg,1,0.81724,0.791287,0.756579,0.704012


In [9]:
def getSimilarImages(image, simNames, simVals):
    if image in set(simNames.index):
        imgs = list(simNames.loc[image, :])
        vals = list(simVals.loc[image, :])
        if image in imgs:
            assert_almost_equal(max(vals), 1, decimal = 5)
            imgs.remove(image)
            vals.remove(max(vals))
        return imgs, vals
    else:
        print("'{}' Unknown image".format(image))

In [10]:
app = Flask(__name__)

In [None]:
from datetime import datetime
from pathlib import Path

In [None]:
@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        file = request.files['query_img']

        # Save input image
        img = Image.open(file.stream)  # PIL image
        uploaded_img_path = "static/uploaded/" + datetime.now().isoformat().replace(":", ".") + "_" + file.filename
        img.save(uploaded_img_path)

        # Run recommender
        simImages, simValues = getSimilarImages(file.filename, similarNames, similarValues)
        img_path = []
        for j in range(0, 4):
            img = Image.open("OriginalImages/" + simImages[j]) 
            result_img_path = "static/uploaded/" + datetime.now().isoformat().replace(":", ".") + "_" + simImages[j]
            img.save(result_img_path)
            img_path.append(result_img_path)
    
        scores = [(simValues[i], img_path[i]) for i in range(0,4)]
        return render_template('index.html',
                               query_path=uploaded_img_path,
                               scores=scores)
    else:
        return render_template('index.html')


if __name__=="__main__":
    app.run("0.0.0.0")

 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [09/Dec/2020 22:09:45] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "GET /static/uploaded/2020-12-09T22.09.52.102588_buildings4.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "GET /static/uploaded/2020-12-09T22.09.52.136471_buildings0.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "GET /static/uploaded/2020-12-09T22.09.52.165332_buildings5.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "GET /static/uploaded/2020-12-09T22.09.52.207203_buildings1.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:09:52] "GET /static/uploaded/2020-12-09T22.09.52.225089_buildings2.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:10:02] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:10:02] "GET /static/uploaded/2020-12-09T22.10.02.341269_donkey0.jpg HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2020 22:10:02] "GET /static/uploaded/20