# Bootstrap Analysis of Genre Impact on Regional Video Game Sales

This notebook provides a complete end-to-end workflow for:
1. Data preprocessing
2. Bootstrap resampling analysis
3. Statistical inference and visualization
4. Results interpretation


## 1. Setup and Imports


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add project root to path
PROJECT_ROOT = Path().resolve().parent
sys.path.insert(0, str(PROJECT_ROOT))

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

print(f"Project root: {PROJECT_ROOT}")


## 2. Load Bootstrap Results

Load the pre-computed bootstrap analysis results.


In [None]:
# Load bootstrap results
means_df = pd.read_csv(PROJECT_ROOT / "results" / "tables" / "bootstrap_means_all_regions.csv")
diff_df = pd.read_csv(PROJECT_ROOT / "results" / "tables" / "bootstrap_differences_all_regions.csv")

print("Bootstrap Means Results:")
print(means_df)
print("\nBootstrap Differences Results:")
print(diff_df.head())


## 3. Results Summary and Interpretation


In [None]:
# Analyze significant differences
regions = ['Global', 'NA', 'EU', 'JP', 'Other']

print("Key Findings:\n")
print("=" * 60)

# Find highest mean in each region
for region in regions:
    region_data = means_df[means_df['Region'] == region]
    if len(region_data) > 0:
        max_genre = region_data.loc[region_data['Mean'].idxmax()]
        print(f"{region}: {max_genre['Genre']} has highest mean ({max_genre['Mean']:.3f})")

print("\n" + "=" * 60)
print("Significant Differences:\n")

# Count significant differences by region
for region in regions:
    region_diffs = diff_df[diff_df['Region'] == region]
    sig_diffs = region_diffs[region_diffs['Significant'] == True]
    print(f"{region}: {len(sig_diffs)} out of {len(region_diffs)} differences are significant")
