In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("statsbomb_euro2020.db")

tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print(tables)


              name
0          lineups
1  sqlite_sequence
2       frames_360
3          matches
4           events


In [2]:
pd.read_sql_query("PRAGMA table_info(events);", conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,fifty_fifty,TEXT,0,,0
1,1,bad_behaviour_card,TEXT,0,,0
2,2,ball_receipt_outcome,TEXT,0,,0
3,3,ball_recovery_offensive,TEXT,0,,0
4,4,ball_recovery_recovery_failure,TEXT,0,,0
...,...,...,...,...,...,...
110,110,team,TEXT,0,,0
111,111,team_id,INTEGER,0,,0
112,112,timestamp,TEXT,0,,0
113,113,type,TEXT,0,,0


In [3]:
pd.read_sql_query("PRAGMA table_info(matches);", conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,match_id,INTEGER,0,,0
1,1,match_date,TEXT,0,,0
2,2,kick_off,TEXT,0,,0
3,3,competition,TEXT,0,,0
4,4,season,TEXT,0,,0
5,5,home_team,TEXT,0,,0
6,6,away_team,TEXT,0,,0
7,7,home_score,INTEGER,0,,0
8,8,away_score,INTEGER,0,,0
9,9,match_status,TEXT,0,,0


In [4]:
pd.read_sql_query("SELECT * FROM events LIMIT 5;", conn)

Unnamed: 0,fifty_fifty,bad_behaviour_card,ball_receipt_outcome,ball_recovery_offensive,ball_recovery_recovery_failure,block_deflection,block_offensive,block_save_block,carry_end_location,clearance_aerial_won,...,substitution_outcome,substitution_outcome_id,substitution_replacement,substitution_replacement_id,tactics,team,team_id,timestamp,type,under_pressure
0,,,,,,,,,,,...,,,,,"{""formation"": 4141.0, ""lineup"": [{""jersey_numb...",Turkey,909,00:00:00.000,Starting XI,
1,,,,,,,,,,,...,,,,,"{""formation"": 433.0, ""lineup"": [{""jersey_numbe...",Italy,914,00:00:00.000,Starting XI,
2,,,,,,,,,,,...,,,,,,Turkey,909,00:00:00.000,Half Start,
3,,,,,,,,,,,...,,,,,,Italy,914,00:00:00.000,Half Start,
4,,,,,,,,,,,...,,,,,,Turkey,909,00:00:00.000,Half Start,


In [5]:
pd.read_sql_query("SELECT DISTINCT type FROM events;", conn)

Unnamed: 0,type
0,50/50
1,Bad Behaviour
2,Ball Receipt*
3,Ball Recovery
4,Block
5,Carry
6,Clearance
7,Dispossessed
8,Dribble
9,Dribbled Past


In [6]:
events = pd.read_sql_query("SELECT match_id, team_id, team, type FROM events;", conn)
matches = pd.read_sql_query("SELECT match_id, home_team, away_team FROM matches;", conn)

passes = events[events['type'] == 'Pass']
def_actions = events[events['type'].isin(['Pressure', 'Duel', 'Interception'])]

passes_count = passes.groupby(['match_id', 'team_id', 'team']).size().reset_index(name='passes')
def_count = def_actions.groupby(['match_id', 'team_id', 'team']).size().reset_index(name='def_actions')

team_stats = pd.merge(passes_count, def_count, on=['match_id', 'team_id', 'team'], how='outer').fillna(0)
team_stats = pd.merge(team_stats, matches[['match_id', 'home_team', 'away_team']], on='match_id', how='left')

ppda_results = []
for match_id in team_stats['match_id'].unique():
    match_df = team_stats[team_stats['match_id'] == match_id]
    for _, row in match_df.iterrows():
        team = row['team']
        team_id = row['team_id']
        team_def = row['def_actions']
        opp_row = match_df[match_df['team_id'] != team_id]
        if not opp_row.empty:
            opp_passes = opp_row.iloc[0]['passes']
            ppda = opp_passes / team_def if team_def > 0 else None
            ppda_results.append({
                'match_id': match_id,
                'team': team,
                'team_id': team_id,
                'passes_opponent': opp_passes,
                'def_actions': team_def,
                'PPDA': round(ppda, 2) if ppda else None
            })

ppda_df = pd.DataFrame(ppda_results)

ppda_df.to_csv("ppda_per_match.csv", index=False, encoding="utf-8-sig")
print("saved as ppda_per_match.csv")

avg_ppda = (
    ppda_df.groupby('team', as_index=False)['PPDA']
    .mean()
    .sort_values(by='PPDA', ascending=True)
)

avg_ppda.to_csv("ppda_team_average.csv", index=False, encoding="utf-8-sig")
print("saved as ppda_team_average.csv")

print("\ntop 10 PPDA teams")
print(avg_ppda.head(10))


saved as ppda_per_match.csv
saved as ppda_team_average.csv

top 10 PPDA teams
              team      PPDA
18           Spain  1.935000
21          Turkey  2.036667
13          Poland  2.036667
0          Austria  2.162500
11     Netherlands  2.175000
16        Scotland  2.256667
2          Croatia  2.272500
17        Slovakia  2.380000
3   Czech Republic  2.416000
8          Germany  2.445000
