In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [2]:

# MODEL DEFINITION (training architecture)

class MatrixFactorizationWithBias(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(MatrixFactorizationWithBias, self).__init__()
        self.user_embeddings = nn.Embedding(num_users, embedding_dim)
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)
        self.global_bias = nn.Parameter(torch.zeros(1))

    def forward(self, user_ids, item_ids):
        user_vecs = self.user_embeddings(user_ids)
        item_vecs = self.item_embeddings(item_ids)
        dot_product = (user_vecs * item_vecs).sum(dim=1)
        
        user_b = self.user_bias(user_ids).squeeze()
        item_b = self.item_bias(item_ids).squeeze()
        
        return dot_product + user_b + item_b + self.global_bias



In [3]:

# INFERENCE CLASS
class MovieRecommenderInference:
    def __init__(self, model_path, ratings_path, movies_path, embedding_dim=64):
        """
        Initialize the recommender system for inference.
        
        Args:
            model_path: Path to saved model weights (.pth file)
            ratings_path: Path to original ratings.csv
            movies_path: Path to movies.csv (with titles)
            embedding_dim: Embedding dimension used during training (default: 64)
        """
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"üöÄ Device: {self.device}")
        
        # Load data
        print("Loading data.")
        self.ratings_df = pd.read_csv(ratings_path)
        self.movies_df = pd.read_csv(movies_path)
        
        # Create mappings
        self._create_mappings()
        
        # Load model
        print(f"Loading model from {model_path}...")
        self.model = MatrixFactorizationWithBias(
            self.num_users, 
            self.num_items, 
            embedding_dim
        ).to(self.device)
        
        checkpoint = torch.load(model_path, map_location=self.device)
        self.model.load_state_dict(checkpoint)
        self.model.eval()
        print("‚úÖ Model loaded successfully\n")
    
    def _create_mappings(self):
        """Create user and item ID mappings."""
        unique_users = sorted(self.ratings_df['userId'].unique())
        unique_items = sorted(self.ratings_df['movieId'].unique())
        
        self.user_to_idx = {user: idx for idx, user in enumerate(unique_users)}
        self.idx_to_user = {idx: user for user, idx in self.user_to_idx.items()}
        
        self.item_to_idx = {item: idx for idx, item in enumerate(unique_items)}
        self.idx_to_item = {idx: item for item, idx in self.item_to_idx.items()}
        
        self.num_users = len(unique_users)
        self.num_items = len(unique_items)
        
        print(f"   ‚úì Users: {self.num_users:,} | Items: {self.num_items:,}")
    
    def search_movies(self, search_term):
        """
        Search for movies by title.
        
        Args:
            search_term: Search query string
            
        Returns:
            DataFrame with matching movies
        """
        results = self.movies_df[
            self.movies_df['title'].str.contains(search_term, case=False, na=False)
        ].copy()
        
        if results.empty:
            return None
        
        return results[['movieId', 'title']].head(20)
    
    def get_popular_movies(self, n=20):
        """
        Get the most popular movies by number of ratings.
        
        Args:
            n: Number of movies to return
            
        Returns:
            DataFrame with popular movies
        """
        rating_counts = self.ratings_df.groupby('movieId').size().reset_index(name='num_ratings')
        popular = rating_counts.merge(
            self.movies_df[['movieId', 'title']], 
            on='movieId', 
            how='left'
        )
        popular = popular.sort_values('num_ratings', ascending=False).head(n)
        
        return popular[['movieId', 'title', 'num_ratings']]
    
    def get_random_movies(self, n=20):
        """
        Get random movie samples.
        
        Args:
            n: Number of movies to return
            
        Returns:
            DataFrame with random movies
        """
        sample = self.movies_df.sample(n=min(n, len(self.movies_df)), random_state=42)
        return sample[['movieId', 'title']]
    
    def get_movie_info(self, movie_ids):
        """
        Get movie information for given movie IDs.
        
        Args:
            movie_ids: List of movie IDs
            
        Returns:
            DataFrame with movie details
        """
        valid_ids = [mid for mid in movie_ids if mid in self.item_to_idx]
        if not valid_ids:
            return None
        
        movies_info = self.movies_df[self.movies_df['movieId'].isin(valid_ids)].copy()
        movies_info = movies_info[['movieId', 'title']]
        return movies_info
    
    def recommend_from_watched_movies(self, watched_movie_ids, k=10):
        """
        Generate recommendations based on a list of watched movie IDs.
        Creates a synthetic user profile based on the watched movies.
        
        Args:
            watched_movie_ids: List of movie IDs the user has watched
            k: Number of recommendations to return
            
        Returns:
            DataFrame with movieId, title, and predicted_rating
        """
        # Validate movie IDs
        valid_movie_ids = [mid for mid in watched_movie_ids if mid in self.item_to_idx]
        invalid_movie_ids = [mid for mid in watched_movie_ids if mid not in self.item_to_idx]
        
        if invalid_movie_ids:
            print(f"‚ö†Ô∏è  Warning: These movie IDs are not in the dataset: {invalid_movie_ids}")
        
        if not valid_movie_ids:
            print("‚ùå None of the provided movie IDs are valid!")
            return None
        
        print(f"‚úì Using {len(valid_movie_ids)} valid movie(s) for recommendations")
        
        # Convert movie IDs to indices
        watched_item_indices = [self.item_to_idx[mid] for mid in valid_movie_ids]
        
        # Create a synthetic user profile by averaging item embeddings
        with torch.no_grad():
            watched_indices_tensor = torch.LongTensor(watched_item_indices).to(self.device)
            watched_item_embeddings = self.model.item_embeddings(watched_indices_tensor)
            
            # Average the embeddings to create a user profile
            synthetic_user_embedding = watched_item_embeddings.mean(dim=0, keepdim=True)
            
            # Get all item embeddings
            all_item_indices = torch.LongTensor(list(range(self.num_items))).to(self.device)
            all_item_embeddings = self.model.item_embeddings(all_item_indices)
            
            # Compute similarity (dot product) between synthetic user and all items
            predictions = torch.matmul(
                synthetic_user_embedding, 
                all_item_embeddings.T
            ).squeeze().cpu().numpy()
            
            # Add item biases and global bias for better predictions
            item_biases = self.model.item_bias(all_item_indices).squeeze().cpu().numpy()
            global_bias = self.model.global_bias.item()
            predictions = predictions + item_biases + global_bias
        
        # Create results dataframe
        results = []
        for idx, pred_rating in enumerate(predictions):
            movie_id = self.idx_to_item[idx]
            
            # Skip movies the user has already watched
            if movie_id not in valid_movie_ids:
                results.append({
                    'movieId': movie_id,
                    'predicted_rating': float(pred_rating)
                })
        
        # Sort by predicted rating and get top k
        results_df = pd.DataFrame(results)
        results_df = results_df.sort_values('predicted_rating', ascending=False).head(k)
        
        # Merge with movie titles
        results_df = results_df.merge(
            self.movies_df[['movieId', 'title']], 
            on='movieId', 
            how='left'
        )
        
        # Fill missing titles with "Unknown"
        results_df['title'] = results_df['title'].fillna('Unknown')
        
        return results_df[['movieId', 'title', 'predicted_rating']].reset_index(drop=True)



In [4]:

# HELPER FUNCTIONS
# ============================================================================
def display_movies(df, title="Movies"):
    """Display movies in a formatted way."""
    print(f"\n{title}:")
    print("=" * 80)
    for _, row in df.iterrows():
        if 'num_ratings' in row:
            print(f"ID: {row['movieId']:6d} | {row['title']:50s} ({row['num_ratings']:,} ratings)")
        else:
            print(f"ID: {row['movieId']:6d} | {row['title']}")
    print("=" * 80)


def movie_search_menu(recommender):
    """Interactive movie search menu."""
    while True:
        print("\n" + "=" * 70)
        print("MOVIE SEARCH")
        print("=" * 70)
        print("\nOptions:")
        print("1. Search for a movie by title")
        print("2. Show popular movies")
        print("3. Show random sample of movies")
        print("4. Done searching (proceed to recommendations)")
        
        choice = input("\nEnter your choice (1/2/3/4): ").strip()
        
        if choice == '1':
            search_term = input("\nüîç Enter movie title to search: ").strip()
            if search_term:
                results = recommender.search_movies(search_term)
                if results is not None:
                    display_movies(results, f"üé¨ Found {len(results)} movie(s) matching '{search_term}'")
                else:
                    print(f"‚ùå No movies found matching '{search_term}'")
            else:
                print("‚ùå Please enter a search term")
        
        elif choice == '2':
            try:
                n = int(input("\nHow many popular movies to show (default 20): ") or "20")
                popular = recommender.get_popular_movies(n)
                display_movies(popular, f"üî• Top {n} Most Popular Movies")
            except ValueError:
                print("‚ùå Invalid number")
        
        elif choice == '3':
            try:
                n = int(input("\nHow many random movies to show (default 20): ") or "20")
                random_movies = recommender.get_random_movies(n)
                display_movies(random_movies, f"üé≤ Random Sample of {n} Movies")
            except ValueError:
                print("‚ùå Invalid number")
        
        elif choice == '4':
            break
        
        else:
            print("‚ùå Invalid choice")


In [5]:
# MAIN EXECUTION
if __name__ == "__main__":
    # Configuration - UPDATE THESE PATHS
    MODEL_PATH = 'model_weights.pth'
    RATINGS_PATH = 'dataset/ratings.csv'
    MOVIES_PATH = 'dataset/movies.csv'  # Original movies CSV with titles
    EMBEDDING_DIM = 64  # Must match training config
    
    # Initialize recommender
    recommender = MovieRecommenderInference(
        model_path=MODEL_PATH,
        ratings_path=RATINGS_PATH,
        movies_path=MOVIES_PATH,
        embedding_dim=EMBEDDING_DIM
    )
    
    print("=" * 70)
    print("MOVIE RECOMMENDATION SYSTEM")
    print("=" * 70)
    
    try:
        # Ask if user wants to search for movies first
        search_first = input("\nDo you want to search for movies first? (y/n): ").strip().lower()
        
        if search_first == 'y':
            movie_search_menu(recommender)
        
        # Get watched movies input
        print("\n" + "=" * 70)
        print("GET RECOMMENDATIONS")
        print("=" * 70)
        print("\nüé¨ Enter movie IDs you've watched (comma-separated)")
        print("   Example: 318, 858, 50, 593\n")
        
        movie_input = input("Movie IDs: ").strip()
        
        if not movie_input:
            print("‚ùå No movie IDs entered!")
        else:
            watched_movie_ids = [int(x.strip()) for x in movie_input.split(',')]
            
            print(f"\nüìã You've watched {len(watched_movie_ids)} movie(s):")
            watched_info = recommender.get_movie_info(watched_movie_ids)
            if watched_info is not None and not watched_info.empty:
                for _, row in watched_info.iterrows():
                    print(f"   ‚Ä¢ {row['movieId']}: {row['title']}")
            print()
            
            k = int(input("üéØ Number of recommendations (default 10): ") or "10")
            
            print(f"\nüîç Generating top {k} recommendations based on your preferences...\n")
            
            recommendations = recommender.recommend_from_watched_movies(
                watched_movie_ids, 
                k=k
            )
            
            if recommendations is not None:
                print("=" * 80)
                print("TOP RECOMMENDATIONS:")
                print("=" * 80)
                print(recommendations.to_string(index=False))
                print("=" * 80)
        
    except ValueError:
        print("‚ùå Invalid input! Please enter numeric values separated by commas.")
    except KeyboardInterrupt:
        print("\n\nüëã Exiting...")
    except Exception as e:
        print(f"‚ùå Error: {e}")


üöÄ Device: cpu
Loading data.
   ‚úì Users: 162,541 | Items: 59,047
Loading model from model_weights.pth...
‚úÖ Model loaded successfully

MOVIE RECOMMENDATION SYSTEM



Do you want to search for movies first? (y/n):  y



MOVIE SEARCH

Options:
1. Search for a movie by title
2. Show popular movies
3. Show random sample of movies
4. Done searching (proceed to recommendations)



Enter your choice (1/2/3/4):  1

üîç Enter movie title to search:  avengers



üé¨ Found 18 movie(s) matching 'avengers':
ID:   2153 | Avengers, The (1998)
ID:  89745 | Avengers, The (2012)
ID: 115727 | Crippled Avengers (Can que) (Return of the 5 Deadly Venoms) (1981)
ID: 122892 | Avengers: Age of Ultron (2015)
ID: 122912 | Avengers: Infinity War - Part I (2018)
ID: 122914 | Avengers: Infinity War - Part II (2019)
ID: 135979 | Next Avengers: Heroes of Tomorrow (2008)
ID: 136257 | Avengers Grimm (2015)
ID: 145676 | 3 Avengers (1964)
ID: 147238 | The New Adventures of the Elusive Avengers (1968)
ID: 147657 | Masked Avengers (1981)
ID: 159920 | Shaolin Avengers (1994)
ID: 159922 | The Shaolin Avengers (1976)
ID: 169616 | Scavengers (2013)
ID: 170297 | Ultimate Avengers 2 (2006)
ID: 186233 | The Scavengers (1970)
ID: 187221 | LEGO Marvel Super Heroes: Avengers Reassembled! (2015)
ID: 189217 | Avengers Grimm: Time Wars (2018)

MOVIE SEARCH

Options:
1. Search for a movie by title
2. Show popular movies
3. Show random sample of movies
4. Done searching (proceed to r


Enter your choice (1/2/3/4):  1

üîç Enter movie title to search:  heat



üé¨ Found 20 movie(s) matching 'heat':
ID:      6 | Heat (1995)
ID:    671 | Mystery Science Theater 3000: The Movie (1996)
ID:   1285 | Heathers (1989)
ID:   1950 | In the Heat of the Night (1967)
ID:   2917 | Body Heat (1981)
ID:   4497 | Dead Heat (1988)
ID:   4531 | Red Heat (1988)
ID:   5017 | Big Heat, The (1953)
ID:   5084 | Caged Heat (1974)
ID:   6814 | City Heat (1984)
ID:   6878 | Porn Theater (Chatte √† deux t√™tes, La) (2002)
ID:   7232 | Heat and Dust (1983)
ID:   8491 | White Heat (1949)
ID:  25735 | Cheat, The (1915)
ID:  26306 | Theatre of Blood (1973)
ID:  26964 | Bikini Summer III - South Beach Heat (1997)
ID:  51265 | Dead Heat on a Merry-Go-Round (1966)
ID:  52462 | Aqua Teen Hunger Force Colon Movie Film for Theaters (2007)
ID:  73608 | Heat (1972)
ID:  73778 | Story of a Cheat, The (Roman d'un tricheur, Le) (1936)

MOVIE SEARCH

Options:
1. Search for a movie by title
2. Show popular movies
3. Show random sample of movies
4. Done searching (proceed to recommend


Enter your choice (1/2/3/4):  1

üîç Enter movie title to search:  lagaan



üé¨ Found 2 movie(s) matching 'lagaan':
ID:   6776 | Lagaan: Once Upon a Time in India (2001)
ID: 181751 | Lagaan: Once Upon a Time in India (2001)

MOVIE SEARCH

Options:
1. Search for a movie by title
2. Show popular movies
3. Show random sample of movies
4. Done searching (proceed to recommendations)



Enter your choice (1/2/3/4):  4



GET RECOMMENDATIONS

üé¨ Enter movie IDs you've watched (comma-separated)
   Example: 318, 858, 50, 593



Movie IDs:  122892,6,6776



üìã You've watched 3 movie(s):
   ‚Ä¢ 6: Heat (1995)
   ‚Ä¢ 6776: Lagaan: Once Upon a Time in India (2001)
   ‚Ä¢ 122892: Avengers: Age of Ultron (2015)



üéØ Number of recommendations (default 10):  5



üîç Generating top 5 recommendations based on your preferences...

‚úì Using 3 valid movie(s) for recommendations
TOP RECOMMENDATIONS:
 movieId                                                 title  predicted_rating
     260             Star Wars: Episode IV - A New Hope (1977)          4.499103
    1196 Star Wars: Episode V - The Empire Strikes Back (1980)          4.463394
     318                      Shawshank Redemption, The (1994)          4.428584
    2959                                     Fight Club (1999)          4.420605
  159817                                   Planet Earth (2006)          4.416103
