In [21]:
import requests as r
import pandas as pd

In [52]:
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; \
Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1'}
           
all_stats = """attempts,attempts_on_target,attempts_off_target,
attempts_blocked,matches_appearance,
passes_accuracy,passes_attempted,passes_completed,
ball_possession,cross_accuracy,
cross_attempted,cross_completed,free_kick,
attacks,assists,corners,offsides,
recovered_ball,tackles,clearance_attempted,
saves,goals_conceded,own_goal_conceded,
saves_on_penalty,clean_sheet,
fouls_committed,fouls_suffered,
yellow_cards,red_cards
"""

phase = 'QUALIFYING'
year = '2024'

URL = f"""
https://compstats.uefa.com/v1/team-ranking?competitionId=3
&limit=60&offset=0&optionalFields=PLAYER,TEAM
&order=DESC&phase={phase}
&seasonYear={year}&stats={all_stats}"""

page = r.get(URL, headers=headers)

In [43]:
stats = page.json()
stats[0]

{'statistics': [{'name': 'attempts',
   'translations': {'name': {'EN': 'Total attempts',
     'FR': 'Tirs',
     'DE': 'Abschlüsse gesamt',
     'ES': 'Disparos totales',
     'PT': 'Total de remates',
     'IT': 'Tiri totali',
     'RU': 'Всего ударов',
     'ZH': 'attempts',
     'TR': 'Total attempts',
     'HU': 'Total attempts',
     'CS': 'Total attempts',
     'NL': 'Total attempts'}},
   'value': '193'},
  {'name': 'attempts_on_target',
   'translations': {'name': {'EN': 'Attempts on target',
     'FR': 'Tirs cadrés',
     'DE': 'Abschlüsse aufs Tor',
     'ES': 'Disparos a puerta',
     'PT': 'Remates enquadrados',
     'IT': 'Tiri nello specchio',
     'RU': 'В створ',
     'ZH': 'attempts_on_target',
     'TR': 'Attempts on target',
     'HU': 'On target',
     'CS': 'Attempts on target',
     'NL': 'On target'}},
   'value': '81'},
  {'name': 'attempts_off_target',
   'translations': {'name': {'EN': 'Attempts off target',
     'FR': 'Tirs non cadrés',
     'DE': 'Abschlüss

In [25]:
# extract all data from the json response
def extract_full_data(stats):
    
    full_data = []
    stat_names = []
    for stat in stats:
        team = stat['team']
        all_stats = stat['statistics']    
        data = []
        data.append(team['internationalName'])
        for single_stat in all_stats:
            data.append(float(single_stat['value']))
        full_data.append(data)
    
    stat_names = [all_stats['name'] \
                  for all_stats in stats[0]['statistics']]
    df = pd.DataFrame(full_data, columns=['team_name']+stat_names)
    
    return df 

In [23]:
df = extract_full_data(stats)
df

Unnamed: 0,team_name,attempts,attempts_on_target,attempts_off_target,attempts_blocked,matches_appearance,passes_accuracy,passes_attempted,passes_completed,ball_possession,...,clearance_attempted,saves,goals_conceded,own_goal_conceded,saves_on_penalty,clean_sheet,fouls_committed,fouls_suffered,yellow_cards,red_cards
0,Portugal,193.0,81.0,69.0,43.0,10.0,89.6,6834.0,6136.0,63.3,...,101.0,16.0,2.0,0.0,0.0,9.0,92.0,98.0,11.0,0.0
1,France,166.0,63.0,60.0,43.0,8.0,90.0,5341.0,4824.0,60.13,...,85.0,15.0,3.0,0.0,1.0,6.0,79.0,76.0,12.0,0.0
2,Switzerland,163.0,59.0,73.0,31.0,10.0,89.4,6709.0,6005.0,66.41,...,110.0,14.0,11.0,0.0,0.0,3.0,92.0,86.0,19.0,1.0
3,Poland,160.0,47.0,62.0,51.0,10.0,84.3,5933.0,5023.0,61.8,...,162.0,15.0,11.0,0.0,0.0,4.0,120.0,128.0,20.0,0.0
4,Israel,158.0,52.0,68.0,38.0,11.0,83.37,5837.0,4896.0,57.73,...,151.0,42.0,15.0,1.0,0.0,2.0,97.0,115.0,25.0,2.0
5,Denmark,156.0,54.0,64.0,38.0,10.0,87.1,6889.0,6019.0,64.2,...,102.0,11.0,10.0,0.0,0.0,3.0,85.0,107.0,14.0,0.0
6,Croatia,146.0,59.0,53.0,34.0,8.0,88.75,5510.0,4907.0,62.5,...,102.0,19.0,4.0,0.0,0.0,5.0,66.0,69.0,11.0,0.0
7,Romania,145.0,48.0,58.0,39.0,10.0,77.8,4389.0,3527.0,52.0,...,183.0,30.0,5.0,0.0,0.0,6.0,107.0,104.0,23.0,1.0
8,Greece,143.0,45.0,53.0,45.0,10.0,86.0,5185.0,4489.0,52.4,...,175.0,29.0,8.0,0.0,1.0,5.0,112.0,100.0,28.0,2.0
9,Spain,142.0,54.0,56.0,32.0,8.0,90.75,5815.0,5280.0,67.63,...,74.0,11.0,5.0,0.0,0.0,4.0,78.0,75.0,16.0,0.0


In [55]:
def extract_full_data_norm(stats):

    # normalize by the number of matches played
    full_norm_data = []

    for stat in stats:
        team = stat['team']
        all_stats = stat['statistics']    
        data = []
        data.append(team['internationalName'])
        # find out how many matches played
        for single_stat in all_stats:
            if single_stat['name'] == 'matches_appearance':
                matches_played = float(single_stat['value'])
                # no need to continue this loop
                break

        for single_stat in all_stats:
            data.append(float(single_stat['value'])/matches_played)

        full_norm_data.append(data)
    
    stat_names = [all_stats['name'] \
                  for all_stats in stats[0]['statistics']]
    df = pd.DataFrame(full_norm_data, columns=['team_name']+stat_names)
    
    return df 

In [58]:
df_norm = extract_full_data_norm(stats)
df_norm

Unnamed: 0,team_name,attempts,attempts_on_target,attempts_off_target,attempts_blocked,matches_appearance,passes_accuracy,passes_attempted,passes_completed,ball_possession,...,clearance_attempted,saves,goals_conceded,own_goal_conceded,saves_on_penalty,clean_sheet,fouls_committed,fouls_suffered,yellow_cards,red_cards
0,Portugal,19.3,8.1,6.9,4.3,1.0,8.96,683.4,613.6,6.33,...,10.1,1.6,0.2,0.0,0.0,0.9,9.2,9.8,1.1,0.0
1,France,20.75,7.875,7.5,5.375,1.0,11.25,667.625,603.0,7.51625,...,10.625,1.875,0.375,0.0,0.125,0.75,9.875,9.5,1.5,0.0
2,Switzerland,16.3,5.9,7.3,3.1,1.0,8.94,670.9,600.5,6.641,...,11.0,1.4,1.1,0.0,0.0,0.3,9.2,8.6,1.9,0.1
3,Poland,16.0,4.7,6.2,5.1,1.0,8.43,593.3,502.3,6.18,...,16.2,1.5,1.1,0.0,0.0,0.4,12.0,12.8,2.0,0.0
4,Israel,14.363636,4.727273,6.181818,3.454545,1.0,7.579091,530.636364,445.090909,5.248182,...,13.727273,3.818182,1.363636,0.090909,0.0,0.181818,8.818182,10.454545,2.272727,0.181818
5,Denmark,15.6,5.4,6.4,3.8,1.0,8.71,688.9,601.9,6.42,...,10.2,1.1,1.0,0.0,0.0,0.3,8.5,10.7,1.4,0.0
6,Croatia,18.25,7.375,6.625,4.25,1.0,11.09375,688.75,613.375,7.8125,...,12.75,2.375,0.5,0.0,0.0,0.625,8.25,8.625,1.375,0.0
7,Romania,14.5,4.8,5.8,3.9,1.0,7.78,438.9,352.7,5.2,...,18.3,3.0,0.5,0.0,0.0,0.6,10.7,10.4,2.3,0.1
8,Greece,14.3,4.5,5.3,4.5,1.0,8.6,518.5,448.9,5.24,...,17.5,2.9,0.8,0.0,0.1,0.5,11.2,10.0,2.8,0.2
9,Spain,17.75,6.75,7.0,4.0,1.0,11.34375,726.875,660.0,8.45375,...,9.25,1.375,0.625,0.0,0.0,0.5,9.75,9.375,2.0,0.0


In [10]:
# save the data (qualifier 2024)
df_full.to_csv('../data/euro24_qualifiers.csv')

In [11]:
# normalized results
df_full_norm = pd.DataFrame(full_norm_data, columns=['team_name']+all_columns)
df_full_norm.head(10)

Unnamed: 0,team_name,attempts,attempts_on_target,attempts_off_target,attempts_blocked,matches_appearance,passes_accuracy,passes_attempted,passes_completed,ball_possession,...,clearance_attempted,saves,goals_conceded,own_goal_conceded,saves_on_penalty,clean_sheet,fouls_committed,fouls_suffered,yellow_cards,red_cards
0,Portugal,19.3,8.1,6.9,4.3,1.0,8.96,683.4,613.6,6.33,...,10.1,1.6,0.2,0.0,0.0,0.9,9.2,9.8,1.1,0.0
1,France,20.75,7.875,7.5,5.375,1.0,11.25,667.625,603.0,7.51625,...,10.625,1.875,0.375,0.0,0.125,0.75,9.875,9.5,1.5,0.0
2,Switzerland,16.3,5.9,7.3,3.1,1.0,8.94,670.9,600.5,6.641,...,11.0,1.4,1.1,0.0,0.0,0.3,9.2,8.6,1.9,0.1
3,Poland,16.0,4.7,6.2,5.1,1.0,8.43,593.3,502.3,6.18,...,16.2,1.5,1.1,0.0,0.0,0.4,12.0,12.8,2.0,0.0
4,Denmark,15.6,5.4,6.4,3.8,1.0,8.71,688.9,601.9,6.42,...,10.2,1.1,1.0,0.0,0.0,0.3,8.5,10.7,1.4,0.0
5,Croatia,18.25,7.375,6.625,4.25,1.0,11.09375,688.75,613.375,7.8125,...,12.75,2.375,0.5,0.0,0.0,0.625,8.25,8.625,1.375,0.0
6,Romania,14.5,4.8,5.8,3.9,1.0,7.78,438.9,352.7,5.2,...,18.3,3.0,0.5,0.0,0.0,0.6,10.7,10.4,2.3,0.1
7,Spain,17.75,6.75,7.0,4.0,1.0,11.34375,726.875,660.0,8.45375,...,9.25,1.375,0.625,0.0,0.0,0.5,9.75,9.375,2.0,0.0
8,Netherlands,17.75,6.375,6.25,5.125,1.0,11.01625,589.0,521.875,7.1725,...,12.375,1.75,0.875,0.0,0.0,0.625,12.25,9.625,1.5,0.0
9,Czechia,16.75,5.125,7.375,4.25,1.0,9.86,485.625,388.375,6.735,...,15.875,1.625,0.75,0.0,0.0,0.5,12.25,11.5,1.875,0.125


In [63]:
# download data for Euro 2020 qualifiers too
phase = 'QUALIFYING'
year = '2020'

URL = f"""
https://compstats.uefa.com/v1/team-ranking?competitionId=3
&limit=60&offset=0&optionalFields=PLAYER,TEAM
&order=DESC&phase={phase}
&seasonYear={year}&stats={all_stats}"""

page = r.get(URL, headers=headers)
stats_q2020 = page.json()

df_qual2020 = extract_full_data(stats_q2020)
df_qual2020_norm = extract_full_data_norm(stats_q2020)
df_qual2020_norm

Unnamed: 0,team_name,attempts,attempts_on_target,attempts_off_target,attempts_blocked,matches_appearance,passes_accuracy,passes_attempted,passes_completed,ball_possession,...,clearance_attempted,saves,goals_conceded,own_goal_conceded,saves_on_penalty,clean_sheet,fouls_committed,fouls_suffered,yellow_cards,red_cards
0,Spain,22.7,8.3,8.0,6.4,1.0,9.08,788.8,717.2,7.041,...,12.6,1.8,0.5,0.0,0.0,0.5,8.6,11.1,1.8,0.1
1,Belgium,20.7,9.2,7.8,3.7,1.0,8.9,711.5,634.4,6.23,...,10.6,1.6,0.3,0.0,0.0,0.7,11.0,8.6,0.6,0.0
2,Russia,20.7,7.6,8.7,4.3,1.0,8.54,610.8,524.0,6.0,...,14.4,1.4,0.8,0.0,0.0,0.7,11.8,8.9,1.1,0.1
3,Italy,20.5,8.1,7.5,4.9,1.0,8.95,680.9,609.9,6.53,...,11.4,2.1,0.4,0.0,0.0,0.6,10.4,10.6,1.2,0.0
4,Bosnia and Herzegovina,17.909091,6.272727,6.636364,5.0,1.0,7.851818,539.727273,470.363636,4.918182,...,17.545455,3.545455,1.636364,0.181818,0.0,0.181818,12.090909,10.909091,1.818182,0.090909
5,Austria,19.1,6.5,7.7,4.9,1.0,8.46,560.3,475.6,5.91,...,16.9,2.2,0.9,0.1,0.0,0.4,15.0,12.0,1.9,0.0
6,Portugal,22.625,8.625,7.875,6.125,1.0,11.01625,658.125,582.0,7.625,...,9.75,1.75,0.75,0.0,0.0,0.5,10.125,11.125,0.75,0.0
7,Greece,18.0,5.6,7.1,5.3,1.0,8.63,569.2,498.2,5.64,...,18.2,1.9,1.4,0.0,0.0,0.2,11.2,11.8,2.5,0.0
8,Switzerland,22.0,7.125,8.875,6.0,1.0,10.7975,573.125,496.75,7.6725,...,14.625,1.875,0.75,0.0,0.0,0.5,11.125,11.875,1.75,0.0
9,France,16.9,6.7,7.8,2.4,1.0,8.95,736.8,661.6,6.791,...,9.9,1.0,0.6,0.0,0.0,0.5,11.2,9.3,1.4,0.0


In [64]:
# download data for Euro 2020 tournament too
phase = 'TOURNAMENT'
year = '2020'

URL = f"""
https://compstats.uefa.com/v1/team-ranking?competitionId=3
&limit=60&offset=0&optionalFields=PLAYER,TEAM
&order=DESC&phase={phase}
&seasonYear={year}&stats={all_stats}"""

page = r.get(URL, headers=headers)
stats_2020 = page.json()

df_2020 = extract_full_data(stats_2020)
df_2020_norm = extract_full_data_norm(stats_2020)
df_2020_norm

Unnamed: 0,team_name,attempts,attempts_on_target,attempts_off_target,attempts_blocked,matches_appearance,passes_accuracy,passes_attempted,passes_completed,ball_possession,...,clearance_attempted,saves,goals_conceded,own_goal_conceded,saves_on_penalty,clean_sheet,fouls_committed,fouls_suffered,yellow_cards,red_cards
0,Italy,18.285714,5.142857,8.428571,4.714286,1.0,12.428571,611.714286,535.857143,7.654286,...,15.285714,1.285714,0.571429,0.0,0.0,0.428571,13.285714,14.285714,1.714286,0.0
1,Spain,18.5,7.166667,7.166667,4.166667,1.0,14.89,873.166667,781.333333,11.14,...,10.5,1.5,1.0,0.166667,0.0,0.333333,12.0,16.666667,1.0,0.0
2,Denmark,16.0,6.666667,5.0,4.333333,1.0,13.75,509.333333,422.5,8.695,...,18.833333,3.0,1.166667,0.166667,0.166667,0.166667,12.5,11.166667,0.833333,0.0
3,Switzerland,13.8,4.4,6.2,3.2,1.0,16.92,503.0,429.2,9.72,...,22.6,4.2,1.8,0.2,0.0,0.0,12.6,12.4,2.2,0.2
4,England,9.142857,3.714286,4.142857,1.285714,1.0,12.327143,546.428571,474.857143,7.428571,...,20.714286,2.285714,0.285714,0.0,0.0,0.714286,10.142857,12.571429,0.857143,0.0
5,Czechia,11.4,3.8,4.8,2.8,1.0,15.482,426.2,330.2,9.52,...,24.4,2.8,0.8,0.0,0.0,0.4,10.2,7.4,1.4,0.0
6,France,14.25,4.5,7.5,2.25,1.0,22.25,597.5,533.25,12.9375,...,15.25,2.0,1.5,0.0,0.25,0.25,12.75,11.5,2.0,0.0
7,Portugal,13.25,4.75,6.5,2.0,1.0,22.125,581.5,513.75,13.375,...,11.5,2.75,1.75,0.5,0.0,0.25,9.5,13.0,1.0,0.0
8,Netherlands,13.25,4.0,5.75,3.5,1.0,20.625,554.0,462.25,13.6875,...,12.75,2.0,1.0,0.0,0.0,0.5,10.0,9.75,0.75,0.25
9,Austria,13.25,3.75,6.0,3.5,1.0,20.375,576.5,470.5,13.125,...,19.25,2.0,1.25,0.0,0.0,0.25,13.75,12.0,1.5,0.0


In [17]:
# save the data (qualifier 2020, tournament 2020, qualifier 2024)
df_full.to_csv('../data/euro20_qualifiers.csv')

In [20]:
# save the data (qualifier 2020)
df_full.to_csv('../data/euro20_finalstage.csv')