# MovieLens Pandas Analysis Notebook
Upload `ratings.csv`, `movies.csv`, `users.csv` before running.

In [ ]:
import pandas as pd
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')
users = pd.read_csv('users.csv')
ratings.head()

In [ ]:
# Convert timestamp
ratings['date'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['year'] = ratings['date'].dt.year
ratings['month'] = ratings['date'].dt.month
ratings['day'] = ratings['date'].dt.day
ratings.head()

In [ ]:
# Average rating trends over time
rating_trend = ratings.groupby('year')['rating'].mean()
rating_trend

In [ ]:
# User-movie pivot
pivot = ratings.pivot_table(index='userId', columns='movieId', values='rating')
pivot

In [ ]:
# Genre popularity by year
movies_expanded = movies.assign(genres=movies['genres'].str.split('|')).explode('genres')
merged = ratings.merge(movies_expanded, on='movieId')
genre_year = merged.groupby(['year','genres'])['rating'].mean()
genre_year

In [ ]:
# Rating behavior by age
merged_users = merged.merge(users, on='userId')
age_rating = merged_users.groupby('age')['rating'].mean()
age_rating

In [ ]:
# Controversial movies (high variance)
variance_df = ratings.groupby('movieId')['rating'].var().reset_index()
variance_df = variance_df.merge(movies, on='movieId')
variance_df.sort_values('rating', ascending=False).head(20)

In [ ]:
# Top 5 movies per genre
genre_avg = merged.groupby(['genres','title'])['rating'].mean().reset_index()
top5 = genre_avg.sort_values(['genres','rating'], ascending=[True,False]).groupby('genres').head(5)
top5

In [ ]:
# Users whose ratings vary most from average
user_variance = ratings.groupby('userId')['rating'].var().sort_values(ascending=False)
user_variance.head(20)