In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
def load_dataset():
    movies_file = "TMBD Movie Dataset.csv"
    # Read the movies data
    movies = pd.read_csv(movies_file)
    return movies

In [2]:
# Preprocess data
def preprocess_data(movies):
    # Use 'overview' column for plot summaries
    movies['content'] = movies['original_title'] + ' ' + movies['overview'].fillna('')
    return movies

In [3]:
# Vectorize using TF-IDF
def vectorize_data(movies):
    vectorizer = TfidfVectorizer(stop_words='english') 
    tfidf_matrix = vectorizer.fit_transform(movies['content'])
    return tfidf_matrix, vectorizer

In [4]:
# Generate recommendations according to cosine similarity
def recommend_movies(query, movies, tfidf_matrix, vectorizer, top_n=5):
    query_vec = vectorizer.transform([query])
    # calculate similarity between the query and each movie's content
    similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    # get the top movies with the highest similarity scores
    top_indices = similarity_scores.argsort()[-top_n:][::-1]
    # keep the necessary columns for recommendation results
    recommendations = movies.iloc[top_indices][['original_title', 'overview']]
    recommendations['similarity'] = similarity_scores[top_indices]
    return recommendations

In [5]:
# Chain all the processes together
def recommend_pipeline(query):
    movies = load_dataset()
    movies = preprocess_data(movies)
    tfidf_matrix, vectorizer = vectorize_data(movies)
    recommendations = recommend_movies(query, movies, tfidf_matrix, vectorizer)
    # display each recommendation, including title, similarity with query, and the summary
    print('Top', len(recommendations),'recommendations based on your query:')
    for index, row in recommendations.iterrows():
        print(row['original_title'], '(Similarity %.4f)' % row['similarity'])
        print(row['overview'])
        print('---')

In [6]:
# Example of usage
query = "I love thrilling action movies set in space, with a comedic twist."
recommend_pipeline(query)

Top 5 recommendations based on your query:
Showtime (Similarity 0.1580)
A spoof of buddy cop movies where two very different cops are forced to team up on a new reality based T.V. cop show.
---
Insidious: Chapter 3 (Similarity 0.1214)
A twisted new tale of terror begins for a teenage girl and her family, predating the haunting of the Lambert family in the earlier movies and revealing more mysteries of the otherworldly realm The Further.
---
Space Chimps (Similarity 0.1130)
Circus monkey Ham III works in a circus where he's regularly shot from a canon but he still lives in the shadow of his father's legacy. A natural born rebel against authority, Ham III is initially reluctant to go on a dangerous space mission to rescue a lost space probe, but away he goes, for lots of RIGHT STUFF-style astro-training alongside two highly prepared chimps, Luna and Titan.
---
Iron Sky (Similarity 0.1077)
In the last moments of World War II, a secret Nazi space program evaded destruction by fleeing to th