# Football Analytics Visualizations
## Complete Chart Gallery - 2025-26 Season
### Dual-Source Data: FBref + Understat

This notebook showcases all visualization types used in the Football Alpha Analysis project.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
import sys
sys.path.append('../src')
from analysis import get_data

plt.style.use('seaborn-v0_8-whitegrid')
df = get_data()

def get_pos(p):
    if pd.isna(p): return 'Unknown'
    p = p.upper()
    if 'GK' in p: return 'GK'
    elif 'DF' in p: return 'DF'
    elif 'MF' in p: return 'MF'
    elif 'FW' in p: return 'FW'
    return 'Unknown'

df['main_pos'] = df['pos'].apply(get_pos)
print(f"Loaded {len(df)} players")

## 1. xG vs Actual Goals Scatter

In [None]:
plt.figure(figsize=(14, 10))
mask = df['xg'].notna()
plot_df = df[mask]

scatter = plt.scatter(plot_df['xg'], plot_df['gls'], alpha=0.5,
                      c=plot_df['finishing_alpha'], cmap='RdYlGn', s=50,
                      edgecolors='white', linewidth=0.5)
plt.colorbar(scatter, label='Finishing Alpha')

max_val = max(plot_df['xg'].max(), plot_df['gls'].max())
plt.plot([0, max_val], [0, max_val], 'k--', linewidth=2, label='Perfect Conversion')

top = plot_df.nlargest(8, 'finishing_alpha')
worst = plot_df.nsmallest(5, 'finishing_alpha')
outliers = pd.concat([top, worst])
texts = [plt.text(r['xg'], r['gls'], r['player'], fontsize=9, fontweight='bold') for _, r in outliers.iterrows()]
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))

plt.xlabel('Expected Goals (xG)', fontsize=12)
plt.ylabel('Actual Goals', fontsize=12)
plt.title('xG vs Actual Goals - Finishing Alpha Map', fontsize=14)
plt.legend()
plt.tight_layout()
plt.show()

## 2. Top/Worst Finishers - Bar Chart

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

top15 = df.dropna(subset=['finishing_alpha']).nlargest(15, 'finishing_alpha')
axes[0].barh(top15['player'], top15['finishing_alpha'], color='#2ecc71')
axes[0].set_xlabel('Finishing Alpha')
axes[0].set_title('Top 15 Clinical Finishers')
axes[0].invert_yaxis()

worst15 = df.dropna(subset=['finishing_alpha']).nsmallest(15, 'finishing_alpha')
axes[1].barh(worst15['player'], worst15['finishing_alpha'], color='#e74c3c')
axes[1].set_xlabel('Finishing Alpha')
axes[1].set_title('Top 15 Underperforming Finishers')
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

## 3. League Comparison

In [None]:
league_stats = df.groupby('comp').agg({
    'finishing_alpha': 'mean',
    'playmaking_alpha': 'mean'
}).round(3)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for i, (col, title) in enumerate([('finishing_alpha', 'Finishing'), ('playmaking_alpha', 'Playmaking')]):
    order = league_stats.sort_values(col).index
    colors = ['#2ecc71' if league_stats.loc[l, col] > 0 else '#e74c3c' for l in order]
    axes[i].barh(order, league_stats.loc[order, col], color=colors)
    axes[i].axvline(x=0, color='black', linewidth=0.5)
    axes[i].set_xlabel(f'Average {title} Alpha')
    axes[i].set_title(f'League {title} Efficiency')

plt.tight_layout()
plt.show()

## 4. Shot Conversion Analysis

In [None]:
shooters = df[(df['sh'] >= 10) & df['sot'].notna()].copy()
shooters['shot_accuracy'] = (shooters['sot'] / shooters['sh']) * 100
shooters['conversion_rate'] = (shooters['gls'] / shooters['sh']) * 100

plt.figure(figsize=(12, 10))
scatter = plt.scatter(shooters['shot_accuracy'], shooters['conversion_rate'],
                      alpha=0.6, c=shooters['gls'], cmap='YlOrRd', s=60)
plt.colorbar(scatter, label='Total Goals')

top_conv = shooters.nlargest(8, 'conversion_rate')
texts = [plt.text(r['shot_accuracy'], r['conversion_rate'], r['player'], fontsize=9, fontweight='bold')
         for _, r in top_conv.iterrows()]
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))

plt.xlabel('Shot Accuracy (SoT%)', fontsize=12)
plt.ylabel('Conversion Rate (G/Sh%)', fontsize=12)
plt.title('Shot Accuracy vs Goal Conversion Rate', fontsize=14)
plt.tight_layout()
plt.show()

## 5. Radar Chart - Player Comparison

In [None]:
top_scorers = df.nlargest(2, 'gls')
p1, p2 = top_scorers.iloc[0], top_scorers.iloc[1]

metrics = ['gls', 'ast', 'xg', 'xag', 'finishing_alpha', 'playmaking_alpha']
max_vals = {m: max(abs(df[m].max()), abs(df[m].min()), 1) for m in metrics if m in df.columns}

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
angles = np.linspace(0, 2 * np.pi, len(metrics), endpoint=False).tolist()
angles += angles[:1]

for player, color in [(p1, '#3498db'), (p2, '#e74c3c')]:
    vals = [(player[m] / max_vals[m] * 100) if pd.notna(player.get(m)) and max_vals.get(m, 0) != 0 else 0 for m in metrics]
    vals += vals[:1]
    ax.plot(angles, vals, 'o-', linewidth=2, label=player['player'], color=color)
    ax.fill(angles, vals, alpha=0.15, color=color)

ax.set_xticks(angles[:-1])
ax.set_xticklabels(metrics, fontsize=10)
ax.set_title(f'{p1["player"]} vs {p2["player"]}', fontsize=14, pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
plt.tight_layout()
plt.show()

## 6. Goal Contributions - Stacked Bar

In [None]:
df_copy = df.copy()
df_copy['g_a_total'] = df_copy['gls'].fillna(0) + df_copy['ast'].fillna(0)
top15_ga = df_copy.nlargest(15, 'g_a_total').sort_values('g_a_total', ascending=True)

plt.figure(figsize=(12, 8))
plt.barh(top15_ga['player'], top15_ga['gls'], label='Goals', color='#3498db')
plt.barh(top15_ga['player'], top15_ga['ast'], left=top15_ga['gls'], label='Assists', color='#2ecc71')
plt.xlabel('Goal Contributions (G+A)')
plt.title('Top 15 Players by Goal Contributions', fontsize=14)
plt.legend()
plt.tight_layout()
plt.show()

## 7. Correlation Heatmap

In [None]:
key_metrics = ['gls', 'ast', 'xg', 'xag', 'npxg', 'finishing_alpha', 'playmaking_alpha',
               'sh', 'sot', 'kp', 'ppa', 'col_90s']
available = [m for m in key_metrics if m in df.columns]
corr = df[available].corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, cmap='RdYlGn', center=0, fmt='.2f', square=True, linewidths=0.5)
plt.title('Correlation Matrix - Key Performance Metrics', fontsize=14)
plt.tight_layout()
plt.show()

## 8. Box Plot - Goals & Alpha by Position

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

df.boxplot(column='gls', by='main_pos', ax=axes[0])
axes[0].set_title('Goals Distribution by Position')
axes[0].set_xlabel('Position')

df.boxplot(column='finishing_alpha', by='main_pos', ax=axes[1])
axes[1].axhline(y=0, color='red', linestyle='--')
axes[1].set_title('Finishing Alpha by Position')
axes[1].set_xlabel('Position')

plt.suptitle('')
plt.tight_layout()
plt.show()

## 9. Minutes vs Goals

In [None]:
plt.figure(figsize=(12, 10))
mask = df['gls_per90'].notna()
scatter = plt.scatter(df.loc[mask, 'min'], df.loc[mask, 'gls'], alpha=0.5,
                      c=df.loc[mask, 'gls_per90'], cmap='YlOrRd', s=50)
plt.colorbar(scatter, label='Goals per 90')

top = df.nlargest(10, 'gls')
texts = [plt.text(r['min'], r['gls'], r['player'], fontsize=9, fontweight='bold') for _, r in top.iterrows()]
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))

plt.xlabel('Minutes Played', fontsize=12)
plt.ylabel('Goals', fontsize=12)
plt.title('Minutes Played vs Goals Scored', fontsize=14)
plt.tight_layout()
plt.show()

## 10. xGChain - Goal Involvement Network

xGChain shows how involved a player is in all goal-scoring possessions, not just the final action.

In [None]:
if 'xgchain' in df.columns:
    chain_df = df.dropna(subset=['xgchain']).copy()
    chain_df['direct_contribution'] = chain_df['gls'] + chain_df['ast']
    chain_df['buildup_ratio'] = chain_df['xgbuildup'] / chain_df['xgchain'].replace(0, np.nan)
    
    plt.figure(figsize=(14, 10))
    scatter = plt.scatter(chain_df['xgchain'], chain_df['direct_contribution'],
                          alpha=0.5, c=chain_df['buildup_ratio'], cmap='coolwarm', s=50)
    plt.colorbar(scatter, label='Buildup Ratio (xGBuildup/xGChain)')
    
    # High xGChain players
    top_chain = chain_df.nlargest(10, 'xgchain')
    texts = [plt.text(r['xgchain'], r['direct_contribution'], r['player'], fontsize=9, fontweight='bold')
             for _, r in top_chain.iterrows()]
    adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))
    
    plt.xlabel('xGChain (Total Goal Chain Involvement)', fontsize=12)
    plt.ylabel('Direct Goal Contributions (G+A)', fontsize=12)
    plt.title('xGChain vs Direct Contributions - Who Drives Attacks?', fontsize=14)
    plt.tight_layout()
    plt.show()
else:
    print("xGChain data not available")

## 11. Squad Composition - Pie Chart

In [None]:
# Example: Top team in each league
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
teams = ['Real Madrid', 'Barcelona', 'Manchester City']
colors_map = {'GK': '#9b59b6', 'DF': '#3498db', 'MF': '#2ecc71', 'FW': '#e74c3c', 'Unknown': '#95a5a6'}

for i, team_name in enumerate(teams):
    team = df[df['squad'].str.contains(team_name, case=False, na=False)]
    if len(team) == 0:
        axes[i].set_title(f'{team_name} (not found)')
        continue
    pos_counts = team['main_pos'].value_counts()
    axes[i].pie(pos_counts.values, labels=pos_counts.index, autopct='%1.0f%%',
                colors=[colors_map.get(p, '#95a5a6') for p in pos_counts.index])
    axes[i].set_title(f'{team_name} ({len(team)} players)')

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated 11 visualization types:

1. **Scatter Plot** - xG vs Goals with Finishing Alpha color mapping
2. **Horizontal Bar** - Top/worst clinical finishers
3. **League Comparison** - Average alpha by competition
4. **Shot Conversion** - Accuracy vs conversion rate
5. **Radar Chart** - Multi-metric player comparison
6. **Stacked Bar** - Goal contributions breakdown
7. **Correlation Heatmap** - Key metric relationships
8. **Box Plot** - Distribution by position
9. **Minutes vs Goals** - Playing time analysis
10. **xGChain** - Goal involvement network (Understat)
11. **Pie Chart** - Squad composition

**New in this version**: xGChain visualization (#10) leverages Understat's advanced involvement metrics.