In [102]:
import requests

response = requests.post('https://fbrapi.com/generate_api_key')
api_key = response.json()['api_key']

In [105]:
# This function creates a tidy dataframe with 3 columns (Team, Penalty Kick Attempts, Penalty Kick Scored) from the FBref.com website
# the inputs 'league' and 'year' determine the API paramaters to allow us to create dataframes based on season and league.
# The endpoints being used are /team-season-stats - https://fbrapi.com/documentation#team-season-stats

import time
import pandas as pd

def create_df(league: int, year: str):
  # we will be running this multiple times - FBReference has a limit of 6 seconds between requests.
  t = 6.1
  time.sleep(t)

  url = "https://fbrapi.com/team-season-stats"
  params = {
    "league_id": league,
    "season_id": year,
  }
  headers = {"X-API-Key": api_key}

  response = requests.get(url=url, params=params, headers=headers)
  response.raise_for_status()
  data = response.json()

  print(f'this is what the original json data looks like:\n {data}')

  for meta in data:
    data_simplified = data[meta]

  df = pd.DataFrame(data_simplified)

  pk_made = []
  pk_attempts = []
  team_names = []

  for goal in df['stats']:
    pk_made.append(goal['stats']['ttl_pk_made'])

  for attempt in df['stats']:
    pk_attempts.append(attempt['stats']['ttl_pk_att'])

  for team in df['meta_data']:
    team_names.append(team['team_name'])

  dict = {'team': team_names, 'pk attempts': pk_attempts, 'pk made': pk_made}

  return pd.DataFrame(dict).set_index('team')

df_pl2122 = create_df(9, "2021-2022")   #the English Premier League code is 9

print(f'\nPremier League 2021-2022 Season:\n {df_pl2122}')


this is what the original json data looks like:
 {'data': [{'meta_data': {'team_id': '18bb7c10', 'team_name': 'Arsenal'}, 'stats': {'stats': {'roster_size': 27, 'matches_played': 38, 'ttl_gls': 60, 'ttl_ast': 41, 'ttl_non_pen_gls': 55, 'ttl_xg': 60.5, 'ttl_non_pen_xg': 55.1, 'ttl_xag': 40.2, 'ttl_pk_made': 5, 'ttl_pk_att': 8, 'ttl_yellow_cards': 67, 'ttl_red_cards': 4, 'ttl_carries_prog': 734, 'ttl_passes_prog': 1655, 'avg_gls': 1.58, 'avg_ast': 1.08, 'avg_non_pen_gls': 1.45, 'avg_xg': 1.59, 'avg_xag': 1.06, 'avg_non_pen_xg': 1.45}, 'keepers': {'ttl_gls_ag': 48, 'avg_gls_ag': 1.26, 'sot_ag': 147, 'ttl_saves': 100, 'save_pct': 70.7, 'clean_sheets': 13, 'clean_sheet_pct': 34.2, 'pk_att_ag': 6, 'pk_made_ag': 5, 'pk_saved': 0, 'pk_miss_ag': 1, 'pk_save_pct': 0.0}, 'keepersadv': {'ttl_pk_att_ag': 5, 'ttl_fk_gls_ag': 0, 'ttl_ck_gls_ag': 3, 'ttl_og_ag': 1, 'ttl_psxg': 47.6, 'psxg_per_sot': 0.29, 'ttl_launched_pass_cmp': 151, 'ttl_launched_pass_att': 533, 'pct_launched_pass_cmp': 28.3, 'ttl_pa

In [None]:
# To get multiple dataframes lets run the create_df a few more times
df_pl2021 = create_df(9, "2020-2021")
df_pl2223 = create_df(9, "2022-2023")
df_pl2324 = create_df(9, "2023-2024")
df_pl2425 = create_df(9, "2024-2025")


In [None]:
# Let's sure that the pk attempts and pk made are integers so that we can sum them (multiple season totals).

df_pl2223.dtypes

Unnamed: 0,0
pk attempts,int64
pk made,int64


In [None]:
# merging dataframes to sum attempts and goals while adding our conversion ratio of goals to attempts

dataframes = [df_pl2021, df_pl2122, df_pl2223, df_pl2324, df_pl2425]

merged_df = pd.concat(dataframes, join='outer').groupby('team').sum()

merged_df['conversion ratio'] = (merged_df['pk made']/merged_df['pk attempts']).round(2)
print(merged_df)


                 pk attempts  pk made  conversion ratio
team                                                   
Arsenal                   30       26              0.87
Aston Villa               23       18              0.78
Bournemouth               10        8              0.80
Brentford                 23       21              0.91
Brighton                  35       28              0.80
Burnley                    8        7              0.88
Chelsea                   39       34              0.87
Crystal Palace            23       17              0.74
Everton                   20       17              0.85
Fulham                    21       13              0.62
Ipswich Town               2        2              1.00
Leeds United              12       10              0.83
Leicester City            23       17              0.74
Liverpool                 36       30              0.83
Luton Town                 5        5              1.00
Manchester City           42       33           

In [None]:
# average conversion over the last 5 Premier League seasons.

attempts = merged_df['pk attempts'].sum()
goals = merged_df['pk made'].sum()

print(goals/attempts)

0.8220502901353965


In [None]:
merged_df['conversion ratio'].median()

0.85

In [None]:
# Let's see if there are strong underperformers/overperformers. The xG of a PK is 0.79
print(merged_df.sort_values(by='conversion ratio', ascending=False))

                 pk attempts  pk made  conversion ratio
team                                                   
Ipswich Town               2        2              1.00
Norwich City               3        3              1.00
Luton Town                 5        5              1.00
Wolves                    12       12              1.00
West Brom                  4        4              1.00
Brentford                 23       21              0.91
Tottenham                 22       20              0.91
Newcastle Utd             31       28              0.90
Sheffield Utd              9        8              0.89
Burnley                    8        7              0.88
Arsenal                   30       26              0.87
Chelsea                   39       34              0.87
Manchester Utd            30       26              0.87
Everton                   20       17              0.85
Liverpool                 36       30              0.83
Leeds United              12       10           

In [None]:
# Let's remove the entries that 5 or less attempts as these represent teams that have only spent 1 year in the league.

filter_by_attempts = merged_df.loc[merged_df['pk attempts'] > 5]
print(filter_by_attempts.sort_values(by='conversion ratio', ascending=False))

                 pk attempts  pk made  conversion ratio
team                                                   
Wolves                    12       12              1.00
Brentford                 23       21              0.91
Tottenham                 22       20              0.91
Newcastle Utd             31       28              0.90
Sheffield Utd              9        8              0.89
Burnley                    8        7              0.88
Chelsea                   39       34              0.87
Manchester Utd            30       26              0.87
Arsenal                   30       26              0.87
Everton                   20       17              0.85
Leeds United              12       10              0.83
Liverpool                 36       30              0.83
Bournemouth               10        8              0.80
Brighton                  35       28              0.80
Manchester City           42       33              0.79
Aston Villa               23       18           

In [None]:
filter_by_attempts['conversion ratio'].median()

0.83

In [None]:
# Despite Fulham massively underperforming at penalty kicks, looking at the table it seems that a lot of teams are doing a lot better than the 0.79 probability of a PK goal.
import statistics

sd_list = []

for i in filter_by_attempts['conversion ratio']:
  sd_list.append(i)

print(sd_list)
print(f'standard deviation: {statistics.stdev(sd_list)}, mean: {statistics.mean(sd_list)}')



[0.87, 0.78, 0.8, 0.91, 0.8, 0.88, 0.87, 0.74, 0.85, 0.62, 0.83, 0.74, 0.83, 0.79, 0.87, 0.9, 0.7, 0.89, 0.69, 0.91, 0.73, 1.0]
standard deviation: 0.08931679112848367, mean: 0.8181818181818182


In [None]:
#

In [None]:
# Checking if this function works for other top leagues.

df_sa2021 = create_df(league=11, year='2020-2021')

print(df_sa2021)

               pk attempts  pk made
team                               
Atalanta                 7        6
Benevento                7        5
Bologna                  4        3
Cagliari                 5        4
Crotone                  9        9
Fiorentina               6        6
Genoa                    4        4
Hellas Verona            3        3
Inter                    9        8
Juventus                10        8
Lazio                   10        6
Milan                   20       15
Napoli                   8        7
Parma                    5        5
Roma                     8        7
Sampdoria                6        5
Sassuolo                14       13
Spezia                   5        4
Torino                   6        5
Udinese                  4        4


In [None]:
# TO DO:
# Make the same data for 4 other leagues, make a pivot table where we compare total pk conversion in those leagues.