In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
movies.head()


In [None]:
ratings.head()

In [None]:
ratings.info()

In [None]:
print(movies.isnull().sum())
print(ratings.isnull().sum())

==> no missing data

In [None]:
movies_exploded = movies.copy()
movies_exploded['genres'] = movies_exploded['genres'].str.split('|')
movies_exploded = movies_exploded.explode('genres')

print(f"\nAfter exploding genres: {len(movies_exploded)} rows")
print("\nSample of exploded data:")
print(movies_exploded.head(10))


genre_counts = movies_exploded['genres'].value_counts()


if '(no genres listed)' in genre_counts.index:
    genre_counts = genre_counts.drop('(no genres listed)')




In [None]:
print("\n" + "="*50)
print("GENRE DISTRIBUTION")
print("="*50)
print(genre_counts)


In [None]:
plt.figure(figsize=(14, 6))

genre_counts.plot(kind='bar', color='steelblue', edgecolor='black')
plt.title('Number of Movies per Genre', fontsize=14, fontweight='bold')
plt.xlabel('Genre', fontsize=12)
plt.ylabel('Number of Movies', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()

In [None]:
top_movies = ratings.groupby('movieId')['rating'].mean().reset_index()
top_movies = top_movies.merge(movies[['movieId','title']], on='movieId')
top_movies = top_movies.sort_values('rating', ascending=False).head(10)
print(top_movies[['title','rating']])

In [None]:
top_drama = movies_exploded[movies_exploded['genres'] == 'Drama'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_drama.sort_values('rating', ascending=False).head(10)

In [None]:
top_comedy = movies_exploded[movies_exploded['genres'] == 'Comedy'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_comedy.sort_values('rating', ascending=False).head(10)

In [None]:
top_romance = movies_exploded[movies_exploded['genres'] == 'Romance'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_romance.sort_values('rating', ascending=False).head(10)

In [None]:
top_thriller = movies_exploded[movies_exploded['genres'] == 'Thriller'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_thriller.sort_values('rating', ascending=False).head(10)

In [None]:
top_action = movies_exploded[movies_exploded['genres'] == 'Action'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_action.sort_values('rating', ascending=False).head(10)

In [None]:
top_scifi = movies_exploded[movies_exploded['genres'] == 'Sci-Fi'].merge(ratings, on='movieId').drop_duplicates(subset=['movieId'])
top_scifi.sort_values('rating', ascending=False).head(10)