# Exploratory Data Analysis â€” Video Game Sales

This notebook demonstrates usage of the `project_games` package for EDA.

In [None]:
import matplotlib.pyplot as plt

from project_games.data.loader import load_processed_data
from project_games.analysis.temporal import games_per_year, filter_relevant_period
from project_games.analysis.platform import platform_total_sales, platform_yearly_sales
from project_games.analysis.genre import genre_sales_summary, classify_genres
from project_games.analysis.regional import top_platforms_by_region, top_genres_by_region
from project_games.analysis.hypothesis import run_configured_tests
from project_games.visualization.plots_matplotlib import (
    plot_games_per_year,
    plot_platform_evolution,
    plot_boxplot_by_group,
    plot_regional_bars,
)

In [None]:
df = load_processed_data()
df.info()

## Games per Year

In [None]:
gpy = games_per_year(df)
plot_games_per_year(gpy)
plt.show()

## Platform Analysis

In [None]:
df_rel = filter_relevant_period(df)
ps = platform_total_sales(df_rel)
top10 = ps.head(10).index.tolist()
pys = platform_yearly_sales(df_rel, top10)
plot_platform_evolution(pys, top10)
plt.show()

In [None]:
plot_boxplot_by_group(df_rel, "platform", groups=top10)
plt.show()

## Genre Analysis

In [None]:
gs = genre_sales_summary(df_rel)
print(gs)
print("\nClassification:", classify_genres(df_rel))

## Regional Analysis

In [None]:
plot_regional_bars(top_platforms_by_region(df_rel), title="Top 5 Platforms")
plt.show()

In [None]:
plot_regional_bars(top_genres_by_region(df_rel), title="Top 5 Genres")
plt.show()

## Hypothesis Tests

In [None]:
results = run_configured_tests(df_rel)
for r in results:
    print(r.summary())