In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import warnings
warnings.filterwarnings('ignore')

class MovieRecommendationSystem:
    def __init__(self, dataset_path=None):
        """
        Initialize the Movie Recommendation System
        
        Args:
            dataset_path (str): Path to the movie dataset CSV file
        """
        self.movies_df = None
        self.tfidf_matrix = None
        self.tfidf_vectorizer = None
        self.cosine_sim = None
        
        if dataset_path:
            self.load_dataset(dataset_path)
        else:
            self.create_sample_dataset()
    
    def create_sample_dataset(self):
        """Create a sample movie dataset for demonstration"""
        sample_data = {
            'title': [
                'The Dark Knight', 'Inception', 'The Matrix', 'Avatar', 'Titanic',
                'The Godfather', 'Pulp Fiction', 'Forrest Gump', 'The Shawshank Redemption',
                'Fight Club', 'Interstellar', 'The Lion King', 'Toy Story', 'Finding Nemo',
                'Iron Man', 'The Avengers', 'Spider-Man', 'Batman Begins', 'Joker',
                'Parasite', 'La La Land', 'The Grand Budapest Hotel', 'Moonlight',
                'Get Out', 'Black Panther', 'Wonder Woman', 'Aquaman', 'Shazam',
                'Guardians of the Galaxy', 'Thor', 'Captain America', 'Doctor Strange',
                'Ant-Man', 'The Incredible Hulk', 'Captain Marvel', 'Deadpool',
                'Logan', 'X-Men', 'Fantastic Four', 'The Fantastic Beasts'
            ],
            'genres': [
                'Action Crime Drama', 'Action Sci-Fi Thriller', 'Action Sci-Fi', 'Action Adventure Sci-Fi', 'Drama Romance',
                'Crime Drama', 'Crime Drama', 'Drama Romance', 'Drama',
                'Drama Thriller', 'Adventure Drama Sci-Fi', 'Animation Adventure Family', 'Animation Adventure Family', 'Animation Adventure Family',
                'Action Adventure Sci-Fi', 'Action Adventure Sci-Fi', 'Action Adventure Sci-Fi', 'Action Crime Drama', 'Crime Drama Thriller',
                'Comedy Drama Thriller', 'Comedy Drama Musical', 'Adventure Comedy Drama', 'Drama',
                'Horror Mystery Thriller', 'Action Adventure Sci-Fi', 'Action Adventure Fantasy', 'Action Adventure Sci-Fi', 'Action Adventure Comedy',
                'Action Adventure Comedy Sci-Fi', 'Action Adventure Fantasy', 'Action Adventure Sci-Fi', 'Action Adventure Fantasy Sci-Fi',
                'Action Adventure Comedy Sci-Fi', 'Action Adventure Sci-Fi', 'Action Adventure Sci-Fi', 'Action Adventure Comedy Sci-Fi',
                'Action Adventure Sci-Fi', 'Action Adventure Sci-Fi', 'Action Adventure Sci-Fi', 'Adventure Fantasy Sci-Fi'
            ],
            'description': [
                'A dark knight fights crime in Gotham City with advanced technology and martial arts skills.',
                'A skilled thief enters people\'s dreams to steal secrets but faces his most challenging mission.',
                'A computer hacker discovers the reality he knows is actually a simulated world controlled by machines.',
                'A paraplegic marine becomes part of the Avatar program on an alien planet called Pandora.',
                'A tragic love story aboard the ill-fated RMS Titanic during its maiden voyage.',
                'The aging patriarch of an organized crime dynasty transfers control to his reluctant son.',
                'The lives of two mob hitmen, a boxer, and other criminals intertwine in Los Angeles.',
                'A simple man with low IQ achieves extraordinary things and influences historical events.',
                'A banker convicted of murdering his wife forms friendships and finds redemption in prison.',
                'An insomniac office worker forms an underground fight club with a soap salesman.',
                'A team of explorers travels through a wormhole in space to save humanity.',
                'A young lion prince flees his kingdom after his father\'s death and later returns to reclaim his throne.',
                'A cowboy toy feels threatened by a new space ranger toy in a child\'s room.',
                'A clownfish searches for his son who was captured by divers and taken to a fish tank.',
                'A billionaire industrialist builds a high-tech suit of armor to fight crime and terrorism.',
                'Superheroes assemble to fight an alien invasion threatening Earth.',
                'A teenager gains spider powers and learns to be a superhero in New York City.',
                'A young Bruce Wayne begins his journey to become Batman and fight crime in Gotham.',
                'A failed comedian descends into madness and becomes the criminal mastermind known as Joker.',
                'A poor family schemes to infiltrate a wealthy household with unexpected consequences.',
                'A jazz musician and actress fall in love while pursuing their dreams in Los Angeles.',
                'The adventures of a legendary concierge at a famous European hotel and his protÃ©gÃ©.',
                'A young black man struggles with his identity and sexuality in a rough Miami neighborhood.',
                'A young black man visits his white girlfriend\'s family estate and uncovers disturbing secrets.',
                'The king of Wakanda fights to protect his nation from enemies seeking to exploit its resources.',
                'An Amazon princess leaves her island home to fight in World War I and save mankind.',
                'The half-human, half-Atlantean ruler must unite the underwater and surface worlds.',
                'A teenage boy gains superpowers and must learn to use them responsibly.',
                'A group of intergalactic criminals become unlikely heroes to save the galaxy.',
                'The Norse god of thunder must prove himself worthy of his powers and hammer.',
                'A weakly man becomes a super-soldier during World War II to fight against evil forces.',
                'A former neurosurgeon becomes a master of the mystic arts after a car accident.',
                'A thief becomes a superhero with the ability to shrink in size while gaining strength.',
                'A scientist transforms into a giant green monster when he becomes angry.',
                'A pilot gains incredible powers and becomes one of the universe\'s most powerful heroes.',
                'A wisecracking mercenary with accelerated healing powers fights crime in his own chaotic way.',
                'An aging wolverine cares for Professor X while confronting his own mortality.',
                'Mutants with special powers fight for acceptance in a world that fears them.',
                'A team of scientists gains superpowers after exposure to cosmic radiation.',
                'A young wizard discovers magical creatures and adventures in the wizarding world.'
            ]
        }
        
        self.movies_df = pd.DataFrame(sample_data)
        print(f"Sample dataset created with {len(self.movies_df)} movies")
    
    def load_dataset(self, dataset_path):
        """
        Load movie dataset from CSV file
        
        Args:
            dataset_path (str): Path to CSV file
        """
        try:
            self.movies_df = pd.read_csv(dataset_path)
            print(f"Dataset loaded successfully with {len(self.movies_df)} movies")
        except FileNotFoundError:
            print(f"File {dataset_path} not found. Creating sample dataset instead.")
            self.create_sample_dataset()
        except Exception as e:
            print(f"Error loading dataset: {e}")
            self.create_sample_dataset()
    
    def preprocess_text(self, text):
        """
        Clean and preprocess text data
        
        Args:
            text (str): Raw text to preprocess
            
        Returns:
            str: Cleaned text
        """
        if pd.isna(text):
            return ""
        
        # Convert to lowercase
        text = text.lower()
        
        # Remove special characters and numbers
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        
        # Remove extra whitespace
        text = ' '.join(text.split())
        
        return text
    
    def create_feature_matrix(self):
        """Create TF-IDF feature matrix from movie descriptions and genres"""
        if self.movies_df is None:
            print("No dataset loaded. Please load a dataset first.")
            return
        
        # Combine genres and descriptions for better feature representation
        combined_features = []
        
        for idx, row in self.movies_df.iterrows():
            # Combine genres and description
            genres = self.preprocess_text(str(row.get('genres', '')))
            description = self.preprocess_text(str(row.get('description', '')))
            
            # Give more weight to genres by repeating them
            combined_text = f"{genres} {genres} {description}"
            combined_features.append(combined_text)
        
        # Create TF-IDF matrix
        self.tfidf_vectorizer = TfidfVectorizer(
            max_features=5000,
            stop_words='english',
            ngram_range=(1, 2),
            max_df=0.8,
            min_df=2
        )
        
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(combined_features)
        print(f"TF-IDF matrix created with shape: {self.tfidf_matrix.shape}")
    
    def calculate_similarity(self):
        """Calculate cosine similarity matrix"""
        if self.tfidf_matrix is None:
            print("TF-IDF matrix not created. Please create feature matrix first.")
            return
        
        self.cosine_sim = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)
        print(f"Cosine similarity matrix calculated with shape: {self.cosine_sim.shape}")
    
    def find_movie_index(self, movie_title):
        """
        Find the index of a movie by title
        
        Args:
            movie_title (str): Movie title to search for
            
        Returns:
            int: Index of the movie, or None if not found
        """
        # Try exact match first
        exact_match = self.movies_df[self.movies_df['title'].str.lower() == movie_title.lower()]
        if not exact_match.empty:
            return exact_match.index[0]
        
        # Try partial match
        partial_match = self.movies_df[self.movies_df['title'].str.lower().str.contains(movie_title.lower(), na=False)]
        if not partial_match.empty:
            return partial_match.index[0]
        
        return None
    
    def get_recommendations(self, movie_title, num_recommendations=5):
        """
        Get movie recommendations based on cosine similarity
        
        Args:
            movie_title (str): Title of the movie to get recommendations for
            num_recommendations (int): Number of recommendations to return
            
        Returns:
            list: List of recommended movie titles with similarity scores
        """
        if self.cosine_sim is None:
            print("Similarity matrix not calculated. Initializing system...")
            self.create_feature_matrix()
            self.calculate_similarity()
        
        # Find movie index
        movie_idx = self.find_movie_index(movie_title)
        
        if movie_idx is None:
            print(f"Movie '{movie_title}' not found in dataset.")
            print("\nAvailable movies (showing first 10):")
            for i, title in enumerate(self.movies_df['title'].head(10)):
                print(f"{i+1}. {title}")
            return []
        
        # Get similarity scores for all movies
        sim_scores = list(enumerate(self.cosine_sim[movie_idx]))
        
        # Sort movies by similarity score (excluding the movie itself)
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        
        # Get top N similar movies (excluding the input movie)
        similar_movies = sim_scores[1:num_recommendations+1]
        
        # Get movie details
        recommendations = []
        for idx, score in similar_movies:
            movie_info = {
                'title': self.movies_df.iloc[idx]['title'],
                'genres': self.movies_df.iloc[idx]['genres'],
                'similarity_score': round(score, 3),
                'description': self.movies_df.iloc[idx]['description'][:100] + "..."
            }
            recommendations.append(movie_info)
        
        return recommendations
    
    def display_recommendations(self, movie_title, num_recommendations=5):
        """
        Display formatted recommendations
        
        Args:
            movie_title (str): Movie title to get recommendations for
            num_recommendations (int): Number of recommendations to display
        """
        recommendations = self.get_recommendations(movie_title, num_recommendations)
        
        if not recommendations:
            return
        
        print(f"\nðŸŽ¬ Top {len(recommendations)} recommendations for '{movie_title}':")
        print("=" * 70)
        
        for i, movie in enumerate(recommendations, 1):
            print(f"\n{i}. {movie['title']}")
            print(f"   Genres: {movie['genres']}")
            print(f"   Similarity Score: {movie['similarity_score']}")
            print(f"   Description: {movie['description']}")
    
    def get_random_movie(self):
        """Get a random movie from the dataset"""
        if self.movies_df is not None:
            return self.movies_df['title'].sample().iloc[0]
        return None
    
    def search_movies(self, query):
        """
        Search for movies containing the query string
        
        Args:
            query (str): Search query
            
        Returns:
            list: List of matching movie titles
        """
        if self.movies_df is None:
            return []
        
        matching_movies = self.movies_df[
            self.movies_df['title'].str.lower().str.contains(query.lower(), na=False)
        ]['title'].tolist()
        
        return matching_movies

def main():
    """Main function to run the movie recommendation system"""
    print("ðŸŽ¬ Movie Recommendation System")
    print("=" * 50)
    
    # Initialize the recommendation system
    recommender = MovieRecommendationSystem()
    
    # Create feature matrix and calculate similarity
    recommender.create_feature_matrix()
    recommender.calculate_similarity()
    
    while True:
        print("\nOptions:")
        print("1. Get movie recommendations")
        print("2. Search movies")
        print("3. Get random movie recommendation")
        print("4. Exit")
        
        choice = input("\nEnter your choice (1-4): ").strip()
        
        if choice == '1':
            movie_title = input("\nEnter movie title: ").strip()
            if movie_title:
                try:
                    num_recs = int(input("Number of recommendations (default 5): ").strip() or "5")
                    recommender.display_recommendations(movie_title, num_recs)
                except ValueError:
                    recommender.display_recommendations(movie_title)
        
        elif choice == '2':
            query = input("\nEnter search query: ").strip()
            if query:
                matching_movies = recommender.search_movies(query)
                if matching_movies:
                    print(f"\nFound {len(matching_movies)} movies matching '{query}':")
                    for i, movie in enumerate(matching_movies, 1):
                        print(f"{i}. {movie}")
                else:
                    print(f"No movies found matching '{query}'")
        
        elif choice == '3':
            random_movie = recommender.get_random_movie()
            if random_movie:
                print(f"\nRandom movie selected: {random_movie}")
                recommender.display_recommendations(random_movie)
        
        elif choice == '4':
            print("\nThank you for using the Movie Recommendation System!")
            break
        
        else:
            print("\nInvalid choice. Please try again.")

if __name__ == "__main__":
    main()

ðŸŽ¬ Movie Recommendation System
Sample dataset created with 40 movies
TF-IDF matrix created with shape: (40, 75)
Cosine similarity matrix calculated with shape: (40, 40)

Options:
1. Get movie recommendations
2. Search movies
3. Get random movie recommendation
4. Exit



Enter your choice (1-4):  2

Enter search query:  Avatar



Found 1 movies matching 'Avatar':
1. Avatar

Options:
1. Get movie recommendations
2. Search movies
3. Get random movie recommendation
4. Exit



Enter your choice (1-4):  1

Enter movie title:  Avatar
Number of recommendations (default 5):  4



ðŸŽ¬ Top 4 recommendations for 'Avatar':

1. The Avengers
   Genres: Action Adventure Sci-Fi
   Similarity Score: 0.957
   Description: Superheroes assemble to fight an alien invasion threatening Earth....

2. Aquaman
   Genres: Action Adventure Sci-Fi
   Similarity Score: 0.91
   Description: The half-human, half-Atlantean ruler must unite the underwater and surface worlds....

3. The Incredible Hulk
   Genres: Action Adventure Sci-Fi
   Similarity Score: 0.91
   Description: A scientist transforms into a giant green monster when he becomes angry....

4. Black Panther
   Genres: Action Adventure Sci-Fi
   Similarity Score: 0.839
   Description: The king of Wakanda fights to protect his nation from enemies seeking to exploit its resources....

Options:
1. Get movie recommendations
2. Search movies
3. Get random movie recommendation
4. Exit



Enter your choice (1-4):  4



Thank you for using the Movie Recommendation System!
