# Metacritic â€” Business & Research EDA
This notebook analyzes `data/processed/metacritic_cleaned.csv` and produces interactive Plotly visuals appropriate for executive reporting and Tableau export.

In [1]:
import os
import pandas as pd
import plotly.express as px
import numpy as np
pd.set_option('display.max_columns', 80)

In [2]:
PATH = '/home/jubaer/Downloads/GOTY 2025 analysis/data/processed/metacritic_cleaned.csv'
assert os.path.exists(PATH), 'Run src/clean_metacritic.py first'
df = pd.read_csv(PATH, dtype=str)
# normalize types
if 'release_date' in df.columns:
    df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
for c in ['metascore','user_score']:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce')
display(df.head(5))
df.info()

Unnamed: 0,name,platform,release_date,metascore,user_score,developer,publisher,genre
0,The Legend of Zelda: Tears of the Kingdom - Ni...,Nintendo Switch 2,2025-06-05,95.0,8.3,Nintendo,Nintendo,Open-World Action
1,The Legend of Zelda: Breath of the Wild - Nint...,Nintendo Switch 2,2025-06-05,95.0,8.5,Nintendo,Nintendo,Open-World Action
2,Hades II,PC,2025-09-25,95.0,8.6,Supergiant Games,Supergiant Games,Action RPG
3,Hades II,Nintendo Switch,2025-09-25,98.0,9.3,Supergiant Games,Supergiant Games,Action RPG
4,Hades II,Nintendo Switch 2,2025-09-25,94.0,8.6,Supergiant Games,Supergiant Games,Action RPG


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 543 entries, 0 to 542
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   name          543 non-null    object        
 1   platform      543 non-null    object        
 2   release_date  543 non-null    datetime64[ns]
 3   metascore     543 non-null    float64       
 4   user_score    543 non-null    float64       
 5   developer     543 non-null    object        
 6   publisher     543 non-null    object        
 7   genre         543 non-null    object        
dtypes: datetime64[ns](1), float64(2), object(5)
memory usage: 34.1+ KB


## Score distributions and platform breakdown
Show distributions, platform comparisons, and metascore vs user_score relationships.

In [3]:
if 'metascore' in df.columns:
    fig = px.histogram(df, x='metascore', nbins=40, title='Metascore distribution', template='plotly_white')
    fig.show()

if 'user_score' in df.columns:
    fig2 = px.histogram(df, x='user_score', nbins=40, title='User score distribution', template='plotly_white')
    fig2.show()

# metascore vs user_score scatter
if 'metascore' in df.columns and 'user_score' in df.columns:
    sub = df.dropna(subset=['metascore','user_score'])
    fig3 = px.scatter(sub, x='metascore', y='user_score', hover_data=['name','platform'], title='Metascore vs User score', template='plotly_white')
    fig3.show()

## Publisher/Developer performance and genre effects
Aggregate metrics for business KPIs (median metascore, number of releases, avg user score).

In [4]:
# top developers by median metascore (min 3 releases)
if 'developer' in df.columns and 'metascore' in df.columns:
    agg = df.groupby('developer').agg(releases=('name','count'), median_metascore=('metascore','median')).reset_index()
    agg = agg[agg['releases'] >= 3].sort_values('median_metascore', ascending=False).head(20)
    fig = px.bar(agg, x='developer', y='median_metascore', color='releases', title='Top developers by median metascore (>=3 releases)', template='plotly_white')
    fig.update_layout(xaxis_tickangle=-45, height=500)
    fig.show()

### Save summary table for dashboards
Save a developer-level summary that can be imported into Tableau.

In [5]:
os.makedirs('data/processed', exist_ok=True)
if 'developer' in df.columns and 'metascore' in df.columns:
    summary = df.groupby('developer').agg(releases=('name','count'), mean_metascore=('metascore','mean'), mean_user=('user_score','mean')).reset_index()
    summary.to_csv('data/processed/metacritic_developer_summary.csv', index=False)
    print('Wrote metacritic_developer_summary.csv')

Wrote metacritic_developer_summary.csv
