# 05 - Export Dashboard Data

Generates pre-processed JSON files for the React dashboard. Replaces mock data with real Transfermarkt results.

In [None]:

import pandas as pd
import numpy as np
import sqlite3
import json
from pathlib import Path

DB_PATH = Path('..') / 'data' / 'processed' / 'football.db'
OUT_DIR = Path('..') / 'dashboard' / 'public' / 'data'

conn = sqlite3.connect(DB_PATH)
LEAGUE_NAMES = {'GB1': 'Premier League', 'ES1': 'La Liga', 'IT1': 'Serie A', 'L1': 'Bundesliga', 'FR1': 'Ligue 1'}

def save(name, data):
    path = OUT_DIR / name
    with open(path, 'w') as f:
        json.dump(data, f, indent=2, default=str)
    n = len(data) if isinstance(data, list) else sum(len(v) if isinstance(v, list) else 1 for v in data.values())
    print(f"  {name} -> {path.stat().st_size/1024:.1f} KB ({n} records)")

print(f"Output: {OUT_DIR}")


## 1. market_overview.json

In [None]:

df = pd.read_sql('''
    WITH yearly AS (
        SELECT
            CAST(strftime('%Y', date) AS INTEGER) as year,
            player_club_domestic_competition_id as league_id,
            SUM(market_value_in_eur) as total_market_value,
            COUNT(DISTINCT player_id) as player_count,
            AVG(market_value_in_eur) as avg_player_value
        FROM player_valuations
        WHERE player_club_domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
          AND market_value_in_eur > 0
          AND strftime('%m', date) = '01'
        GROUP BY year, league_id
    )
    SELECT year, league_id, total_market_value, player_count, avg_player_value,
        ROUND(
            (total_market_value - LAG(total_market_value) OVER (PARTITION BY league_id ORDER BY year))
            * 100.0
            / NULLIF(LAG(total_market_value) OVER (PARTITION BY league_id ORDER BY year), 0),
        1) as yoy_growth_pct
    FROM yearly
    WHERE year BETWEEN 2012 AND 2025
    ORDER BY year, league_id
''', conn)
df['league_name'] = df['league_id'].map(LEAGUE_NAMES)
df['total_market_value'] = df['total_market_value'].round(0)
df['avg_player_value'] = df['avg_player_value'].round(0)
df['yoy_growth_pct'] = df['yoy_growth_pct'].where(df['yoy_growth_pct'].notna(), other=None)
save('market_overview.json', df.to_dict('records'))
display(df[df['league_id']=='GB1'][['year','total_market_value','player_count','yoy_growth_pct']].tail(5))


## 2. league_comparison.json

In [None]:

df = pd.read_sql('''
    WITH latest_year AS (
        SELECT MAX(CAST(strftime('%Y', date) AS INTEGER)) - 1 as yr
        FROM player_valuations
        WHERE player_club_domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
    ),
    league_totals AS (
        SELECT pv.player_club_domestic_competition_id as league_id,
            SUM(pv.market_value_in_eur) as total_value,
            AVG(pv.market_value_in_eur) as avg_value,
            COUNT(DISTINCT pv.player_id) as player_count
        FROM player_valuations pv, latest_year ly
        WHERE pv.player_club_domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
          AND pv.market_value_in_eur > 0
          AND CAST(strftime('%Y', pv.date) AS INTEGER) = ly.yr
        GROUP BY pv.player_club_domestic_competition_id
    ),
    club_vals AS (
        SELECT c.domestic_competition_id as league_id, c.name as club_name,
            c.total_market_value,
            ROW_NUMBER() OVER (PARTITION BY c.domestic_competition_id ORDER BY c.total_market_value DESC) as rn
        FROM clubs c
        WHERE c.domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
          AND c.total_market_value > 0
    )
    SELECT lt.league_id, lt.total_value, lt.avg_value, lt.player_count,
        cv.club_name as top_club, cv.total_market_value as top_club_value
    FROM league_totals lt
    LEFT JOIN club_vals cv ON lt.league_id = cv.league_id AND cv.rn = 1
    ORDER BY lt.total_value DESC
''', conn)
df['league'] = df['league_id'].map(LEAGUE_NAMES)
df[['total_value','avg_value','top_club_value']] = df[['total_value','avg_value','top_club_value']].round(0)
save('league_comparison.json', df.to_dict('records'))
display(df[['league','player_count','top_club']].round(0))


## 3. top_transfers.json

In [None]:

df = pd.read_sql('''
    SELECT player_name, from_club_name as from_club, to_club_name as to_club,
        ROUND(transfer_fee) as fee, transfer_season as season, transfer_date
    FROM transfers
    WHERE transfer_fee > 0
    ORDER BY transfer_fee DESC LIMIT 15
''', conn)
save('top_transfers.json', df.to_dict('records'))
display(df[['player_name','from_club','to_club','fee']].head(10))


## 4. age_curves.json

In [None]:

df = pd.read_sql('''
    SELECT
        CAST((julianday(pv.date) - julianday(p.date_of_birth)) / 365.25 AS INTEGER) as age,
        p.position, pv.market_value_in_eur / 1e6 as value_m
    FROM player_valuations pv
    JOIN players p ON pv.player_id = p.player_id
    WHERE p.position IN ('Attack', 'Midfield', 'Defender', 'Goalkeeper')
      AND p.date_of_birth IS NOT NULL
      AND pv.market_value_in_eur > 0
      AND pv.date >= '2015-01-01'
''', conn)
df = df[(df['age'] >= 17) & (df['age'] <= 38)]
pivot = df.groupby(['age','position'])['value_m'].median().unstack('position').reset_index()
for col in ['Attack','Midfield','Defender','Goalkeeper']:
    if col in pivot.columns:
        pivot[col] = pivot[col].round(3)
save('age_curves.json', pivot.to_dict('records'))
display(pivot.head(5))


## 5. risk_metrics.json

In [None]:

df_vol = pd.read_sql('''
    WITH monthly AS (
        SELECT player_club_domestic_competition_id as league_id,
            strftime('%Y', date) as season, strftime('%m', date) as month,
            AVG(market_value_in_eur) as avg_val
        FROM player_valuations
        WHERE player_club_domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
          AND market_value_in_eur > 0 AND date >= '2015-01-01'
        GROUP BY league_id, season, month
    ),
    returns AS (
        SELECT league_id, season,
            (avg_val - LAG(avg_val) OVER (PARTITION BY league_id ORDER BY season, month))
            * 100.0 / NULLIF(LAG(avg_val) OVER (PARTITION BY league_id ORDER BY season, month), 0)
            as monthly_ret
        FROM monthly
    )
    SELECT league_id, season,
        ROUND(SQRT(
            (SUM(monthly_ret*monthly_ret)/COUNT(*)) - (AVG(monthly_ret)*AVG(monthly_ret))
        ), 2) as volatility
    FROM returns
    WHERE monthly_ret IS NOT NULL
    GROUP BY league_id, season HAVING COUNT(*) >= 6
    ORDER BY season, league_id
''', conn)
df_vol['league'] = df_vol['league_id'].map(LEAGUE_NAMES)
df_vol = df_vol[['league','season','volatility']].dropna()

df_dd = pd.read_sql('''
    WITH player_peak AS (
        SELECT pv.player_id, p.position, pv.market_value_in_eur,
            MAX(pv.market_value_in_eur) OVER (
                PARTITION BY pv.player_id ORDER BY pv.date
                ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
            ) as running_max
        FROM player_valuations pv
        JOIN players p ON pv.player_id = p.player_id
        WHERE p.position IN ('Attack', 'Midfield', 'Defender', 'Goalkeeper')
          AND pv.market_value_in_eur > 0
    ),
    drawdown AS (
        SELECT player_id, position,
            ROUND((market_value_in_eur - running_max) * 100.0 / running_max, 2) as drawdown_pct
        FROM player_peak WHERE running_max > 0
    ),
    player_max_dd AS (
        SELECT player_id, position, MIN(drawdown_pct) as max_drawdown
        FROM drawdown GROUP BY player_id, position
    )
    SELECT position,
        ROUND(AVG(max_drawdown), 1) as avg_max_drawdown,
        ROUND(MIN(max_drawdown), 1) as worst_drawdown,
        COUNT(*) as player_count
    FROM player_max_dd GROUP BY position ORDER BY avg_max_drawdown
''', conn)

df_dep = pd.read_sql('''
    WITH age_brackets AS (
        SELECT p.position,
            CASE
                WHEN CAST((julianday(pv.date) - julianday(p.date_of_birth)) / 365.25 AS INT) < 21 THEN 'U21'
                WHEN CAST((julianday(pv.date) - julianday(p.date_of_birth)) / 365.25 AS INT) < 25 THEN '21-24'
                WHEN CAST((julianday(pv.date) - julianday(p.date_of_birth)) / 365.25 AS INT) < 28 THEN '25-27'
                WHEN CAST((julianday(pv.date) - julianday(p.date_of_birth)) / 365.25 AS INT) < 31 THEN '28-30'
                ELSE '31+'
            END as age_bracket,
            (pv.market_value_in_eur - LAG(pv.market_value_in_eur) OVER (PARTITION BY pv.player_id ORDER BY pv.date))
            * 100.0 / NULLIF(LAG(pv.market_value_in_eur) OVER (PARTITION BY pv.player_id ORDER BY pv.date), 0)
            as change_pct
        FROM player_valuations pv
        JOIN players p ON pv.player_id = p.player_id
        WHERE p.position IN ('Attack', 'Midfield', 'Defender', 'Goalkeeper')
          AND p.date_of_birth IS NOT NULL AND pv.market_value_in_eur > 0
    )
    SELECT position, age_bracket,
        ROUND(AVG(change_pct), 2) as avg_change_pct,
        ROUND(AVG(CASE WHEN change_pct < 0 THEN change_pct ELSE NULL END), 2) as depreciation_rate
    FROM age_brackets
    WHERE change_pct IS NOT NULL AND ABS(change_pct) < 200
    GROUP BY position, age_bracket ORDER BY position, age_bracket
''', conn)

df_sharpe = pd.read_sql('''
    WITH monthly_vals AS (
        SELECT player_club_domestic_competition_id as league_id,
            strftime('%Y-%m', date) as month, AVG(market_value_in_eur) as avg_val
        FROM player_valuations
        WHERE player_club_domestic_competition_id IN ('GB1','ES1','IT1','L1','FR1')
          AND market_value_in_eur > 0 AND date >= '2015-01-01'
        GROUP BY league_id, month
    ),
    monthly_rets AS (
        SELECT league_id, month,
            (avg_val - LAG(avg_val) OVER (PARTITION BY league_id ORDER BY month))
            * 100.0 / NULLIF(LAG(avg_val) OVER (PARTITION BY league_id ORDER BY month), 0)
            as ret
        FROM monthly_vals
    )
    SELECT league_id,
        ROUND(AVG(ret) * 12, 2) as avg_return,
        ROUND(SQRT((SUM(ret*ret)/COUNT(*)) - (AVG(ret)*AVG(ret))) * SQRT(12), 2) as volatility,
        ROUND(AVG(ret) / NULLIF(SQRT((SUM(ret*ret)/COUNT(*)) - (AVG(ret)*AVG(ret))), 0), 3) as sharpe_ratio
    FROM monthly_rets
    WHERE ret IS NOT NULL GROUP BY league_id ORDER BY sharpe_ratio DESC
''', conn)
df_sharpe['league'] = df_sharpe['league_id'].map(LEAGUE_NAMES)

risk = {
    'volatility_heatmap': df_vol.to_dict('records'),
    'drawdown_by_position': df_dd.to_dict('records'),
    'depreciation_rates': df_dep.to_dict('records'),
    'sharpe_ratios': df_sharpe[['league','sharpe_ratio','avg_return','volatility']].to_dict('records'),
}
save('risk_metrics.json', risk)
print("\nDrawdown:")
display(df_dd)
print("\nSharpe:")
display(df_sharpe[['league','sharpe_ratio','avg_return','volatility']])


In [None]:

conn.close()
print()
print('All 5 JSON files exported. Dashboard now uses real Transfermarkt data.')
