In [None]:
import pandas as pd

df = pd.read_csv("movies.csv")
df = df[['original_title', 'description']].dropna()


In [None]:
import nltk
nltk.data.clear_cache()
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords

def preprocess_text(text):
    tokens = text.lower().split()
    tokens = [word for word in tokens if word.isalnum()]  # Remove punctuation
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return " ".join(tokens)

df['processed_description'] = df['description'].apply(preprocess_text)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['processed_description'])


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend(query, top_n=5):
    query_vec = vectorizer.transform([preprocess_text(query)])  # Transform query
    similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()  
    top_indices = similarity_scores.argsort()[-top_n:][::-1]  # Get top N matches
    return df.iloc[top_indices][['original_title', 'description', 'processed_description']]

# Example usage:
user_query = "I love thrilling action movies set in space, with a comedic twist."
print(recommend(user_query))


In [None]:
import sys

if __name__ == "__main__":
    user_input = sys.argv[1] if len(sys.argv) > 1 else input("Enter a description: ")
    results = recommend(user_input)
    print(results)
