In [1]:
!pip install scikit-learn pandas




In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv('/content/imdb_2024_movies.csv')


# Drop any missing values
df.dropna(inplace=True)

# Vectorize the storylines using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Storyline'])

# Recommendation function
def recommend_movies(input_storyline, top_n=5):
    input_vec = vectorizer.transform([input_storyline])
    similarity_scores = cosine_similarity(input_vec, tfidf_matrix).flatten()
    top_indices = similarity_scores.argsort()[-top_n:][::-1]
    return df.iloc[top_indices][['Movie Name', 'Storyline']]


In [9]:
user_input = "A young wizard begins his journey at a magical school where he makes friends and enemies, facing dark forces along the way."

recommendations = recommend_movies(user_input)
for i, row in recommendations.iterrows():
    print(f"🎬 {row['Movie Name']}\n📖 {row['Storyline']}\n")


🎬 The Magic Academy
📖 A young student navigates life at a school for magic, facing challenges.

🎬 The Dark Sorcerer
📖 A young hero confronts an ancient sorcerer threatening the magical world.

🎬 Mystic Trials
📖 A teenager must pass magical tests to uncover his destiny.

🎬 The Wizard’s Journey
📖 A boy discovers his magical abilities and faces a powerful sorcerer.

🎬 Arcane Legacy
📖 Legacy magic awakens in a new generation destined to change the world.

