# movie_search_solution.ipynb

# Section 1: Install & Import Libraries 

In [5]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


# Section 2: Load Dataset 

In [6]:
df = pd.read_csv("movies.csv")
print(df.head())

              title                                               plot
0         Spy Movie  A spy navigates intrigue in Paris to stop a te...
1  Romance in Paris  A couple falls in love in Paris under romantic...
2      Action Flick  A high-octane chase through New York with expl...


# Section 3: Load Embedding Model 

In [7]:
model = SentenceTransformer("all-MiniLM-L6-v2")


# Section 4: Encode Plots 

In [8]:
# Convert each plot into embeddings
df["embeddings"] = df["plot"].apply(lambda x: model.encode(x))


# Section 5: Define Search Function

In [9]:

def search_movies(query, top_n=5):
    """
    Given a search query, return the top_n most relevant movies
    based on cosine similarity of embeddings.
    """
    query_embedding = model.encode(query)
    similarities = cosine_similarity([query_embedding], list(df["embeddings"]))[0]
    df["similarity"] = similarities
    results = df.sort_values("similarity", ascending=False).head(top_n)
    return results[["title", "plot", "similarity"]]


# Section 6: Test Example Query 

In [10]:
print(search_movies("spy thriller in Paris", top_n=5))

              title                                               plot  \
0         Spy Movie  A spy navigates intrigue in Paris to stop a te...   
1  Romance in Paris  A couple falls in love in Paris under romantic...   
2      Action Flick  A high-octane chase through New York with expl...   

   similarity  
0    0.769684  
1    0.388030  
2    0.256777  
