In [1]:
import pandas as pd

# Load datasets
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Group by movieId and aggregate to get count and mean rating
grouped_ratings = ratings.groupby('movieId').agg(
    count=('rating', 'size'),
    mean_rating=('rating', 'mean')
).reset_index()

# Filter movies with more than 50 ratings
filtered_ratings = grouped_ratings[grouped_ratings['count'] > 50]

# Merge with movie titles
movies_with_ratings = pd.merge(filtered_ratings, movies, on='movieId')

# Find the movie with the highest average rating
most_popular_movie = movies_with_ratings.sort_values(by='mean_rating', ascending=False).iloc[0]

# Display the result
print("Most popular movie based on average user ratings:")
print(f"Title: {most_popular_movie['title']}")
print(f"Average Rating: {most_popular_movie['mean_rating']}")


Most popular movie based on average user ratings:
Title: Shawshank Redemption, The (1994)
Average Rating: 4.429022082018927


In [2]:
import pandas as pd

# Load datasets
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Group by movieId and aggregate to get count of ratings
grouped_ratings = ratings.groupby('movieId').agg(
    count=('rating', 'size'),
    mean_rating=('rating', 'mean')
).reset_index()

# Filter movies with more than 50 ratings
filtered_ratings = grouped_ratings[grouped_ratings['count'] > 50]

# Merge with movie titles
movies_with_ratings = pd.merge(filtered_ratings, movies, on='movieId')

# Sort by count of ratings in descending order and get top 5
top_5_movies = movies_with_ratings.sort_values(by='count', ascending=False).head(5)

# Display top 5 movies
print("Top 5 movies based on number of user ratings:")
print(top_5_movies[['title', 'count']])


Top 5 movies based on number of user ratings:
                                title  count
61                Forrest Gump (1994)    329
53   Shawshank Redemption, The (1994)    317
49                Pulp Fiction (1994)    307
99   Silence of the Lambs, The (1991)    279
248                Matrix, The (1999)    278


In [3]:
import pandas as pd

# Load datasets
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Group by movieId and aggregate to get count of ratings
grouped_ratings = ratings.groupby('movieId').agg(
    count=('rating', 'size'),
    mean_rating=('rating', 'mean')
).reset_index()

# Filter movies with more than 50 ratings
filtered_ratings = grouped_ratings[grouped_ratings['count'] > 50]

# Merge with movie titles
movies_with_ratings = pd.merge(filtered_ratings, movies, on='movieId')

# Filter for Sci-Fi genre
sci_fi_movies = movies_with_ratings[movies_with_ratings['genres'].str.contains('Sci-Fi', case=False, na=False)]

# Sort by count of ratings in descending order and get the third most popular
third_most_popular_sci_fi = sci_fi_movies.sort_values(by='count', ascending=False).iloc[2]

# Display the result
print("Third most popular Sci-Fi movie based on number of user ratings:")
print(f"Title: {third_most_popular_sci_fi['title']}")
print(f"Number of Ratings: {third_most_popular_sci_fi['count']}")


Third most popular Sci-Fi movie based on number of user ratings:
Title: Jurassic Park (1993)
Number of Ratings: 238
