**Name:**Tanvir Ahmed Apu

**Id:** 212 505 1045


**Course Code:** CSE 426  
**Course Title:** Data mining and warehouse Lab  
**Section:** 8A

**Department of Computer Science & Engineering**

**Step 1: Load Data & Create User-Movie Matrix**

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Mount Google Drive if running in Colab
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load datasets
ratings_df = pd.read_csv("/content/drive/MyDrive/Data Mining and warehouse/Week-1/03 Recommendation System 2/ratings.csv")
movies_df = pd.read_csv("/content/drive/MyDrive/Data Mining and warehouse/Week-1/03 Recommendation System 2/movies.csv")

In [None]:
# Create User-Movie Matrix
user_movie_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [None]:
# Compute average rating for each movie
average_movie_ratings = ratings_df.groupby('movieId')['rating'].mean()

In [None]:
# Compute cosine similarity between movies
movie_similarity_matrix = pd.DataFrame(
    cosine_similarity(user_movie_matrix.T),
    index=user_movie_matrix.columns,
    columns=user_movie_matrix.columns
)

**Step 2: Find Similar Movies**

In [None]:
def find_similar_movies(movie_title, top_n=5):
    """Finds the top N most similar movies to a given movie title."""
    movie_id = movies_df[movies_df['title'] == movie_title]['movieId'].values
    if len(movie_id) == 0:
        print("Movie not found.")
        return []

    movie_id = movie_id[0]
    # Get similarity scores
    similar_movies = movie_similarity_matrix.loc[movie_id].sort_values(ascending=False)[1:top_n+1]
    # Fetch movie titles
    recommended_titles = movies_df[movies_df['movieId'].isin(similar_movies.index)]['title'].values
    print(f"\nTop {top_n} movies similar to '{movie_title}':")
    for i, title in enumerate(recommended_titles):
        print(f"{i+1}. {title}")

    return recommended_titles

In [None]:
find_similar_movies("Toy Story (1995)", top_n=5)


Top 5 movies similar to 'Toy Story (1995)':
1. Star Wars: Episode IV - A New Hope (1977)
2. Forrest Gump (1994)
3. Jurassic Park (1993)
4. Independence Day (a.k.a. ID4) (1996)
5. Toy Story 2 (1999)


array(['Star Wars: Episode IV - A New Hope (1977)', 'Forrest Gump (1994)',
       'Jurassic Park (1993)', 'Independence Day (a.k.a. ID4) (1996)',
       'Toy Story 2 (1999)'], dtype=object)

**Step 3: Generate Personalized Recommendations for a User**

In [None]:
def recommend_movies_for_user(target_user, top_n=10):
    """Generates personalized movie recommendations for a given user."""

    # Get movies rated by the user
    user_ratings = user_movie_matrix.loc[target_user]
    top_rated_movies = user_ratings[user_ratings > 0].sort_values(ascending=False)

    print(f"\nTop-rated movies for user {target_user}:")
    for movie_id in top_rated_movies.index[:5]:  # Show top 5 highest-rated movies
        movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
        print(f"- {movie_title} ({top_rated_movies[movie_id]}/5)")

    # Find unrated movies
    unrated_movies = user_movie_matrix.columns[user_movie_matrix.loc[target_user] == 0]

    # Generate recommendations
    recommendations = {}

    for movie in top_rated_movies.index:
        similar_movies = movie_similarity_matrix.loc[movie].sort_values(ascending=False)

        for similar_movie in similar_movies.index:
            if similar_movie in unrated_movies:
                similarity_score = similar_movies[similar_movie]
                avg_rating = average_movie_ratings.get(similar_movie, 0)  # Get average rating

                # Combine similarity and rating score
                recommendations[similar_movie] = recommendations.get(similar_movie, 0) + (similarity_score * avg_rating)

    # Sort recommendations by highest combined score
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)

    # Display top recommended movies
    print(f"\nTop {top_n} recommended movies for user {target_user}:")
    for movie_id, score in sorted_recommendations[:top_n]:
        movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values
        movie_title = movie_title[0] if len(movie_title) > 0 else "Unknown Movie"
        print(f"- {movie_title} (Score: {score:.2f})")


recommend_movies_for_user(target_user=45, top_n=10)



Top-rated movies for user 45:
- Ocean's Thirteen (2007) (5.0/5)
- Usual Suspects, The (1995) (5.0/5)
- Pirates of the Caribbean: The Curse of the Black Pearl (2003) (5.0/5)
- Finding Nemo (2003) (5.0/5)
- Pulp Fiction (1994) (5.0/5)

Top 10 recommended movies for user 45:
- Reservoir Dogs (1992) (Score: 504.14)
- Alien (1979) (Score: 470.26)
- Shawshank Redemption, The (1994) (Score: 469.85)
- Fargo (1996) (Score: 465.73)
- Memento (2000) (Score: 461.27)
- Seven (a.k.a. Se7en) (1995) (Score: 456.91)
- Aliens (1986) (Score: 448.68)
- Apocalypse Now (1979) (Score: 446.73)
- Kill Bill: Vol. 1 (2003) (Score: 444.92)
- Full Metal Jacket (1987) (Score: 444.45)
