In [1]:
pip install pandas numpy sentence-transformers faiss-cpu streamlit

Collecting sentence-transformers
  Downloading sentence_transformers-3.4.1-py3-none-any.whl.metadata (10 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (4.4 kB)
Downloading sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.9/275.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading faiss_cpu-1.10.0-cp311-cp311-macosx_11_0_arm64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
Successfully installed faiss-cpu-1.10.0 sentence-transformers-3.4.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd

# Load dataset
df = pd.read_csv("imdb_top_1000.csv")

# Display dataset info
print(df.head())

                                         Poster_Link  \
0  https://m.media-amazon.com/images/M/MV5BMDFkYT...   
1  https://m.media-amazon.com/images/M/MV5BM2MyNj...   
2  https://m.media-amazon.com/images/M/MV5BMTMxNT...   
3  https://m.media-amazon.com/images/M/MV5BMWMwMG...   
4  https://m.media-amazon.com/images/M/MV5BMWU4N2...   

               Series_Title Released_Year Certificate  Runtime  \
0  The Shawshank Redemption          1994           A  142 min   
1             The Godfather          1972           A  175 min   
2           The Dark Knight          2008          UA  152 min   
3    The Godfather: Part II          1974           A  202 min   
4              12 Angry Men          1957           U   96 min   

                  Genre  IMDB_Rating  \
0                 Drama          9.3   
1          Crime, Drama          9.2   
2  Action, Crime, Drama          9.0   
3          Crime, Drama          9.0   
4          Crime, Drama          9.0   

                         

In [5]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the "Overview" field
df['Embeddings'] = df['Overview'].astype(str).apply(lambda x: model.encode(x))

# Convert list embeddings to NumPy array
embeddings_matrix = np.array(df['Embeddings'].tolist())

# Save embeddings for future use
np.save("movie_embeddings.npy", embeddings_matrix)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
import faiss

# Get embedding dimension
embedding_dim = embeddings_matrix.shape[1]

# Create FAISS index
index = faiss.IndexFlatL2(embedding_dim)

# Add embeddings to FAISS index
index.add(embeddings_matrix)

# Save the FAISS index
faiss.write_index(index, "faiss_index.idx")

In [9]:
def search_movies(query, top_k=5):
    """Search for similar movies based on a text query."""
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k=top_k)
    
    results = df.iloc[indices[0]][['Series_Title', 'Genre', 'IMDB_Rating', 'Overview', 'Director']]
    results['Similarity_Score'] = distances[0]  # Show similarity score
    
    return results

# Example search
query = "A sci-fi movie about space exploration"
search_results = search_movies(query)
print(search_results)

        Series_Title                         Genre  IMDB_Rating  \
21      Interstellar      Adventure, Drama, Sci-Fi          8.6   
66            WALL·E  Animation, Adventure, Family          8.4   
566        King Kong     Adventure, Horror, Sci-Fi          7.9   
753             Argo    Biography, Drama, Thriller          7.7   
686  The Right Stuff   Adventure, Biography, Drama          7.8   

                                              Overview           Director  \
21   A team of explorers travel through a wormhole ...  Christopher Nolan   
66   In the distant future, a small waste-collectin...     Andrew Stanton   
566  A film crew goes to a tropical island for an e...   Merian C. Cooper   
753  Acting under the cover of a Hollywood producer...        Ben Affleck   
686  The story of the original Mercury 7 astronauts...     Philip Kaufman   

     Similarity_Score  
21           1.023622  
66           1.158020  
566          1.246088  
753          1.283368  
686          1