In [2]:
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import pickle

# Load movie data
movies_df = pd.read_csv("./movies.csv")

# Create a combined description from various fields (e.g., title, industry, studio, etc.)
movies_df['description'] = movies_df['title'] + " " + movies_df['industry'] + " " + movies_df['studio'] + " " + movies_df['release_year'].astype(str)

# Initialize the SentenceTransformer model to get embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Convert movie descriptions to embeddings
movie_descriptions = movies_df['description'].tolist()
movie_embeddings = model.encode(movie_descriptions)

# Create a FAISS index for fast similarity search
dimension = movie_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(movie_embeddings))

# Save the FAISS index to a pickle file
with open('faiss_index.pkl', 'wb') as f:
    pickle.dump(index, f)

# Also save the movie titles to retrieve results easily
with open('movie_titles.pkl', 'wb') as f:
    pickle.dump(movies_df['title'].tolist(), f)

print("FAISS index saved successfully!")




FAISS index saved successfully!


In [2]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp311-cp311-win_amd64.whl.metadata (4.5 kB)
Collecting numpy<3.0,>=1.25.0 (from faiss-cpu)
  Downloading numpy-2.1.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     ------ --------------------------------- 10.2/60.8 kB ? eta -:--:--
     ------------------------- ------------ 41.0/60.8 kB 495.5 kB/s eta 0:00:01
     -------------------------------------- 60.8/60.8 kB 544.1 kB/s eta 0:00:00
Downloading faiss_cpu-1.9.0-cp311-cp311-win_amd64.whl (14.9 MB)
   ---------------------------------------- 0.0/14.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.9 MB 1.9 MB/s eta 0:00:08
   ---------------------------------------- 0.1/14.9 MB 1.4 MB/s eta 0:00:11
   ---------------------------------------- 0.1/14.9 MB 1.7 MB/s eta 0:00:09
   ---------------------------------------- 0.1/14.9 MB 1.7 MB/s eta 0:00:09
    --------------------------------------


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\garge\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
pip install sentence_transformers --user

^C
Note: you may need to restart the kernel to use updated packages.


In [3]:
import faiss
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer

# Load the FAISS index and movie titles
with open('faiss_index.pkl', 'rb') as f:
    index = pickle.load(f)

with open('movie_titles.pkl', 'rb') as f:
    movie_titles = pickle.load(f)

# Initialize the SentenceTransformer model to get embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to get movie search results from FAISS index
def get_movie_search_results(query):
    # Convert the query into an embedding
    query_embedding = model.encode([query])[0]
    
    # Perform the FAISS search
    D, I = index.search(np.array([query_embedding]).astype('float32'), k=5)  # k=5 for top 5 results
    
    results = []
    for i in I[0]:
        results.append(movie_titles[i])  # Get the movie titles from the index
    
    return "\n".join(results)

# Example of querying the FAISS index
query = "Tell me about Marvel movies"
response = get_movie_search_results(query)
print(response)


Thor: The Dark World
Spider-Man: No Way Home
Thor: Love and Thunder
Thor: Ragnarok
Black Panther
