## loading model 

In [1]:
import os # accessing directory structure
from transformers import BertTokenizer, BertModel
import faiss 
import torch
import numpy as np

In [2]:
path_prefix = "../model-artifacts"

In [3]:
import pickle

faiss_index = faiss.read_index(f"{path_prefix}/faiss_index.idx")
tokenizer = BertTokenizer.from_pretrained(f"{path_prefix}/bert_model")
encoder = BertModel.from_pretrained(f"{path_prefix}/bert_model")

In [4]:
with open(f"{path_prefix}/index_to_id.pickle", 'rb') as handle:
    index_to_id = pickle.load(handle)

In [5]:
def search_index(query, k, faiss_index, index_to_id, tokenizer, encoder):
    query_embedding = encode_query(query, tokenizer, encoder)
    distances, indices = faiss_index.search(query_embedding, k)

    # Retrieve movie IDs for the indices
    return [(index_to_id[idx], distances[0][i]) for i, idx in enumerate(indices[0])]


def encode_titles_batch(titles, batch_size=32):
    all_embeddings = []
    
    for i in range(0, len(titles), batch_size):
        batch = titles[i:i + batch_size]
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt", max_length=128)
        outputs = encoder(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        all_embeddings.append(embeddings)

        print(f"Processed batch {i // batch_size + 1}/{len(titles) // batch_size + 1}")

    # Concatenate all batches
    all_embeddings = np.vstack(all_embeddings)
    return all_embeddings

def encode_query(query, tokenizer, encoder):
    inputs = tokenizer(query, padding=True, truncation=True, return_tensors="pt", max_length=128)
    outputs = encoder(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()

    return embeddings

In [6]:
search_index('toy story', 10, faiss_index, index_to_id, tokenizer, encoder)

[((405,
   'Highlander III: The Sorcerer (a.k.a. Highlander: The Final Dimension) (1994)'),
  47.149372),
 ((32, 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)'), 45.424206),
 ((47, 'Seven (a.k.a. Se7en) (1995)'), 45.41999),
 ((293, 'Léon: The Professional (a.k.a. The Professional) (Léon) (1994)'),
  44.40825),
 ((366,
   "Wes Craven's New Nightmare (Nightmare on Elm Street Part 7: Freddy's Finale, A) (1994)"),
  43.299595),
 ((83, 'Once Upon a Time... When We Were Colored (1995)'), 42.53481),
 ((284, 'New York Cop (Nyû Yôku no koppu) (1993)'), 41.428364),
 ((112, 'Rumble in the Bronx (Hont faan kui) (1995)'), 40.592518),
 ((154, 'Beauty of the Day (Belle de jour) (1967)'), 40.41004),
 ((63,
   "Don't Be a Menace to South Central While Drinking Your Juice in the Hood (1996)"),
  39.788284)]