In [20]:
import pandas as pd
import duckdb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set up plotting and display options
plt.style.use('default')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 50)

print("🏈 Premier League Analytics - Exploration Started")

🏈 Premier League Analytics - Exploration Started


In [21]:
# Connect to database
db_path = "../data/premierleague_raw.duckdb"
conn = duckdb.connect(db_path)
print(f"✅ Connected to: {db_path}")

# Get all tables and basic info
tables_query = """
SELECT table_name 
FROM information_schema.tables 
WHERE table_schema='main' AND table_type='BASE TABLE'
ORDER BY table_name
"""

all_tables = conn.execute(tables_query).fetchdf()
stat_tables = [t for t in all_tables['table_name'] if any(t.startswith(p) for p in ['squad_', 'opponent_', 'player_'])]
print(f"📊 Found {len(stat_tables)} stat tables")

# Show table counts
print("\n📈 Table Row Counts:")
for table in sorted(stat_tables)[:15]:  # Show first 15
    count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
    print(f"  {table:<25} {count:>4} rows")

# Current season status
fixture_info = conn.execute("""
SELECT 
    MAX(current_through_gameweek) as current_gameweek,
    COUNT(*) as total_fixtures,
    SUM(CASE WHEN is_completed THEN 1 ELSE 0 END) as completed_fixtures
FROM raw_fixtures
""").fetchdf().iloc[0]

print(f"\n🏈 Season Status:")
print(f"  Current gameweek: {fixture_info['current_gameweek']}")
print(f"  Completed fixtures: {fixture_info['completed_fixtures']}/{fixture_info['total_fixtures']}")

✅ Connected to: ../data/premierleague_raw.duckdb
📊 Found 33 stat tables

📈 Table Row Counts:
  opponent_defense            20 rows
  opponent_goalshotcreation   20 rows
  opponent_keepers            20 rows
  opponent_keepersadv         20 rows
  opponent_misc               20 rows
  opponent_passing            20 rows
  opponent_passingtypes       20 rows
  opponent_playingtime        20 rows
  opponent_possession         20 rows
  opponent_shooting           20 rows
  opponent_standard           20 rows
  player_defense             370 rows
  player_goalshotcreation    370 rows
  player_keepers              21 rows
  player_keepersadv           21 rows

🏈 Season Status:
  Current gameweek: 4.0
  Completed fixtures: 30.0/380.0
