# Search Engine

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import time
import os
def time_elapsed(s):
    '''
    Function to establish baselines for time needed to setup a search engine
    '''
    print("Time Elapsed: {} seconds".format(round(time.time() - s, 4)))

In [4]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

import PIL
import pickle
import os
import traceback
import json
import logging
import numpy as np

from search import SearchEngine
from networks import FeatureExtractor


## Initialize SearchEngine

In [21]:
save_directory = './embeddings'
search_engine = SearchEngine(["text", "image"], save_directory = save_directory, verbose = True)

## Add models

In [22]:
resnet152_15 = pickle.load(open("pickles/models/entire_nuswide_model_15.p", "rb"))
search_engine.add_model(
    name = "ResNet152_15", 
    modalities = ["image","text"], 
    embedding_nets = [resnet152_15.modalityOneNet, resnet152_15.modalityTwoNet],
    input_dimensions= [(2048,), (300,)], 
    output_dimension = 64, 
    desc = "ResNet152 trained with 15 epochs")
search_engine.models["ResNet152_15"].add_preprocessing("image", FeatureExtractor("resnet152").get_embedding)

In [23]:
resnet152_5 = pickle.load(open("pickles/models/entire_nuswide_model_5.p", "rb"))
search_engine.add_model(
    name = "ResNet152_5", 
    modalities = ["image","text"], 
    embedding_nets = [resnet152_5.modalityOneNet, resnet152_5.modalityTwoNet],
    input_dimensions= [(2048,), (300,)], 
    output_dimension = 64, 
    desc = "ResNet152 trained with 5 epochs")
search_engine.models["ResNet152_5"].add_preprocessing("image", FeatureExtractor("resnet152").get_embedding)

In [24]:
resnet18_5 = pickle.load(open("pickles/models/entire_nuswide_model_5-18.p", "rb"))
search_engine.add_model(
    name = "ResNet18_5", 
    modalities = ["image","text"], 
    embedding_nets = [resnet18_5.modalityOneNet, resnet18_5.modalityTwoNet],
    input_dimensions= [(512,), (300,)], 
    output_dimension = 64, 
    desc = "ResNet18 trained with 5 epochs")
search_engine.models["ResNet18_5"].add_preprocessing("image", FeatureExtractor("resnet18").get_embedding)

## Create Datasets

In [9]:
image_directory = 'data/Flickr'
image_from_idx = [i[0] for i in ImageFolder(image_directory).samples]

In [10]:
s = time.time()

image_data18 = np.array([])
directory_18 = "data/nuswide_features/resnet18/"
filenames = sorted(["{}/{}".format(directory_18, filename) 
                    for filename in os.listdir(directory_18) if filename[-3:] == "npy"])
for filename in filenames:
    image_data18 = np.append(image_data18, np.load(filename,).astype('float32'))
    
time_elapsed(s)

Time Elapsed: 23.2935 seconds


In [11]:
s = time.time()

image_data152 = np.array([])
directory_152 = "data/nuswide_features/resnet152/"
filenames = sorted(["{}/{}".format(directory_152, filename) 
                    for filename in os.listdir(directory_152) if filename[-3:] == "npy"])
for filename in filenames:
    image_data152 = np.append(image_data152, np.load(filename).astype('float32'))
    
time_elapsed(s)

Time Elapsed: 94.5312 seconds


In [12]:
s = time.time()

FAST_TEXT = pickle.load(open("pickles/word_embeddings/word_embeddings_tensors.p", "rb"))
fast_text = FAST_TEXT
text_from_idx = [None] * len(fast_text)
text_data = [None] * len(fast_text)
for idx, (key, value) in enumerate(fast_text.items()):
    text_from_idx[idx] = key
    text_data[idx] = (value, idx)

time_elapsed(s)

Time Elapsed: 69.706 seconds


## Build DataLoaders

In [13]:
batch_size = 128

image18_dataloader = DataLoader(image_data18, batch_size = batch_size)
image152_dataloader = DataLoader(image_data152, batch_size = batch_size)
text_dataloader = DataLoader(text_data, batch_size = batch_size)

In [25]:
search_engine.add_dataset(
    name = "fast_text", 
    data = text_dataloader, 
    targets = text_from_idx, 
    modality = "text", 
    dimension = (300,))

In [26]:
search_engine.add_dataset(
    name = "nus-wide_18", 
    data = image18_dataloader, 
    targets = image_from_idx, 
    modality = "image", 
    dimension = (512,))

In [27]:
search_engine.add_dataset(
    name = "nus-wide_152", 
    data = image152_dataloader, 
    targets = image_from_idx, 
    modality = "image", 
    dimension = (2048,))

## Build Indexes

In [29]:
search_engine.build_index(
    dataset_name = "nus-wide_152",
    model_name = "ResNet152_15",
    binarized = False,
    load_embeddings = False,
    step_size = 100)

Building ResNet152_15, nus-wide_152 index


RuntimeError: Expected object of type torch.DoubleTensor but found type torch.cuda.FloatTensor for argument #2 'mat2'

In [None]:
dataset = search_engine.datasets["nus-wide_152"]

In [None]:
print("ok!")