In [13]:
import pandas as pd
# We only need movies.csv and ratings.csv for the basic system
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# verifying if it worked by looking at the first 5 rows
print("Movies Table:")
print(movies.head())
print("\nRatings Table:")
print(ratings.head())

Movies Table:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  

Ratings Table:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [9]:
# Merge ratings with movie titles
data = pd.merge(ratings, movies, on='movieId')

movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')

print("Matrix Shape:", movie_matrix.shape)
print("Matrix Preview:")
print(movie_matrix.head())

Matrix Shape: (610, 9719)
Matrix Preview:
title   '71 (2014)  'Hellboy': The Seeds of Creation (2004)  \
userId                                                        
1              NaN                                      NaN   
2              NaN                                      NaN   
3              NaN                                      NaN   
4              NaN                                      NaN   
5              NaN                                      NaN   

title   'Round Midnight (1986)  'Salem's Lot (2004)  \
userId                                                
1                          NaN                  NaN   
2                          NaN                  NaN   
3                          NaN                  NaN   
4                          NaN                  NaN   
5                          NaN                  NaN   

title   'Til There Was You (1997)  'Tis the Season for Love (2015)  \
userId                                                      

In [10]:
def get_recommendations(movie_title, min_ratings=50):
    """
    Input: Movie Title (e.g., 'Toy Story (1995)')
    Output: List of recommended movies
    """
    #  Get ratings for the specific movie
    try:
        user_ratings = movie_matrix[movie_title]
    except KeyError:
        return f"Error: Movie '{movie_title}' not found in dataset."

    #  Calculate correlation with all other movies
    similar_movies = movie_matrix.corrwith(user_ratings)

    #  Create a DataFrame for the results
    corr_df = pd.DataFrame(similar_movies, columns=['Correlation'])
    corr_df.dropna(inplace=True)

    # Add 'number of ratings' to filter out obscure movies
    ratings_count = data.groupby('title')['rating'].count()
    corr_df = corr_df.join(ratings_count)

    popular_matches = corr_df[corr_df['rating'] > min_ratings]

    # Sort by highest first and return top 10
    return popular_matches.sort_values('Correlation', ascending=False).head(10)

In [12]:
import pandas as pd
import warnings


warnings.filterwarnings("ignore")

movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

data = pd.merge(ratings, movies, on='movieId')

# Create Matrix
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')

ratings_count = data.groupby('title')['rating'].count()
ratings_stats = pd.DataFrame(ratings_count)
ratings_stats.columns = ['num_ratings']

# Define the Recommendation Function
def get_recommendations(movie_name, min_ratings=50):
    if movie_name not in user_movie_matrix.columns:
        return "Movie not found."

    # Get ratings for the chosen movie
    user_ratings = user_movie_matrix[movie_name]

    # Calculate similarity with all other movies
    similar_movies = user_movie_matrix.corrwith(user_ratings)

    # Convert to DataFrame and clean up
    corr_df = pd.DataFrame(similar_movies, columns=['Correlation'])
    corr_df.dropna(inplace=True)

    corr_df = corr_df.join(ratings_stats['num_ratings'])

    recommendations = corr_df[corr_df['num_ratings'] > min_ratings].sort_values('Correlation', ascending=False)

    return recommendations.head(10)

print("Recommendations for 'Toy Story (1995)':")
print(get_recommendations('Toy Story (1995)'))
print("\n" + "="*50 + "\n")
print("Recommendations for 'Jurassic Park (1993)':")
print(get_recommendations('Jurassic Park (1993)'))

Recommendations for 'Toy Story (1995)':
                                             Correlation  num_ratings
title                                                                
Toy Story (1995)                                1.000000          215
Toy Story 2 (1999)                              0.699211           97
Arachnophobia (1990)                            0.652424           53
Incredibles, The (2004)                         0.643301          125
Finding Nemo (2003)                             0.618701          141
Aladdin (1992)                                  0.611892          183
Erin Brockovich (2000)                          0.598016           70
Wallace & Gromit: The Wrong Trousers (1993)     0.589625           56
Blazing Saddles (1974)                          0.585892           62
Wolf of Wall Street, The (2013)                 0.578479           54


Recommendations for 'Jurassic Park (1993)':
                                            Correlation  num_ratings
title