In [1]:
import plotly.io as pio
import plotly.express as px
import pandas as pd
from constants import (
    CRIT_KS,
    DATE_K,
    PLATS_D,
    URL_K,
    USER_KS,
)
from load_scrape_data import get_clean_data_df

In [2]:
pio.renderers.default = 'iframe_connected'

In [3]:
df = get_clean_data_df()

In [4]:
# 269 games predate 1995
df.sort_values(DATE_K, inplace=True)
i = 268
df.iloc[i: i+2][DATE_K]

https://www.metacritic.com/game/operation-inner-space/          1994-12-31
https://www.metacritic.com/game/magic-and-mayhem-for-heretic/   1995-01-01
Name: date, dtype: datetime64[ns]

In [5]:
plats = PLATS_D.keys()
print(f'{len(plats)} unique platforms: {sorted(plats)}')

22 unique platforms: ['3DS', 'DS', 'Dreamcast', 'Game Boy Advance', 'GameCube', 'Nintendo 64', 'Nintendo Switch', 'PC', 'PSP', 'PlayStation', 'PlayStation 2', 'PlayStation 3', 'PlayStation 4', 'PlayStation 5', 'PlayStation Vita', 'Wii', 'Wii U', 'Xbox', 'Xbox 360', 'Xbox One', 'Xbox Series X', 'iOS (iPhone/iPad)']


In [6]:
# fraction of games w/ criticism vs date
CRITICIZED_K = 'has_criticsm'

# criticism mask 
score_ks = CRIT_KS | USER_KS
masks = [
    pd.notna(df[k])
    for k in score_ks
]
has_criticism_mask = pd.concat(masks, axis=1).any(axis=1)
df[CRITICIZED_K] = has_criticism_mask

# dates as years
dates = df[DATE_K]
year_only_dates = pd.to_datetime(dates).dt.year
df[DATE_K] = year_only_dates

# preserve the index in a col
df[URL_K] = df.index
df.reset_index(inplace=True)

# (date, critcism) value counts
counts_df = df.groupby([DATE_K, CRITICIZED_K]).count()
date_criticism_counts = counts_df[URL_K]

# (date, critcism) value counts as fractions
counts_df = df.groupby([DATE_K, CRITICIZED_K]).size().unstack(fill_value=0)
fractions_df = counts_df.div(counts_df.sum(axis=1), axis=0).reset_index()
counts_df.div(counts_df.sum(axis=1), axis=0).reset_index()
fractions_df.columns = [DATE_K, 'fraction not reviewed', 'fraction reviewed']
fractions_df.set_index(DATE_K, inplace=True)

# releases per year cumulative sum as fraction
counts_df = df.groupby(DATE_K).count()
date_counts = counts_df[URL_K]
releases_cum_sum = date_counts.cumsum()
releases_cum_frac = releases_cum_sum / releases_cum_sum.max()
releases_cum_frac
fractions_df['cumulative releases (normalized)'] = releases_cum_frac.values
fractions_df.head(2)

Unnamed: 0_level_0,fraction not reviewed,fraction reviewed,cumulative releases (normalized)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1981.0,1.0,0.0,8e-06
1982.0,1.0,0.0,3.3e-05


In [7]:
# reformat above for plotly symbols per line
fractions_df[DATE_K]  = fractions_df.index
fractions_df.reset_index(drop=True, inplace=True)
fractions_df = fractions_df.melt(
    id_vars=DATE_K,
    value_vars=fractions_df.columns[:3]
)
fractions_df.head(2)

Unnamed: 0,date,variable,value
0,1981.0,fraction not reviewed,1.0
1,1982.0,fraction not reviewed,1.0


In [8]:
fig = px.line(
    fractions_df,
    color='variable',
    symbol='variable',
    x=DATE_K,
    y='value',
)
fig.update_xaxes(
    range=[1980.5, 2023.5],
    title='release date',
)
fig.update_yaxes(
    title='',    # hide axis label
)
fig.update_traces(
    marker={'size': 12,}
)
fig.update_layout(
    font={'size': 24},
    legend={'x':0.015, 'y': 0.5,},
    legend_title=None,
)