## **Expected Points**

xPts | Serie A 2024/25

Expected Points = (3×𝑃win) + (1×𝑃draw) + (0×𝑃loss)

In [1]:
#imports
import pandas as pd
import numpy as np
from scipy.stats import poisson

##### **Data Retrieval**

In [2]:
url_schedule = 'https://fbref.com/en/comps/11/schedule/Serie-A-Scores-and-Fixtures'

In [3]:
def calculate_points(home_goals, away_goals):
    if home_goals > away_goals:
        return 3, 0  # home wins
    elif home_goals == away_goals:
        return 1, 1  # draw
    else:
        return 0, 3  # away wins

In [4]:
def calculate_xp(xg_home, xg_away):
    prob_home_win = 0
    prob_draw = 0
    prob_away_win = 0

    for x in range(10):  # simulate local goals
        for y in range(10):  # simulate away goals
            value = round(poisson.pmf(x, xg_home) * poisson.pmf(y, xg_away) * 100, 3)  # poisson result
            if x > y:  # home win prob
                prob_home_win += value
            elif x == y:  # draw prob
                prob_draw += value
            else:  # away win prob
                prob_away_win += value

    home_xp = (prob_home_win / 100) * 3 + (prob_draw / 100)
    away_xp = (prob_away_win / 100) * 3 + (prob_draw / 100)

    return [round(home_xp, 2), round(away_xp, 2)]

In [5]:
df_SerieA = pd.read_html(url_schedule)[0]
df_SerieA.head(10)

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Sat,2024-08-17,18:30,Genoa,2.1,2–2,1.9,Inter,,Stadio Comunale Luigi Ferraris,,Match Report,
1,1.0,Sat,2024-08-17,18:30,Parma,1.7,1–1,0.7,Fiorentina,,Stadio Ennio Tardini,,Match Report,
2,1.0,Sat,2024-08-17,20:45,Milan,2.1,2–2,1.0,Torino,,Stadio Giuseppe Meazza,,Match Report,
3,1.0,Sat,2024-08-17,20:45,Empoli,0.5,0–0,0.3,Monza,,Stadio Carlo Castellani - Computer Gross...,,Match Report,
4,1.0,Sun,2024-08-18,18:30,Hellas Verona,1.7,3–0,1.0,Napoli,,Stadio Marc'Antonio Bentegodi,,Match Report,
5,1.0,Sun,2024-08-18,18:30,Bologna,3.2,1–1,0.9,Udinese,,Stadio Renato Dall'Ara,,Match Report,
6,1.0,Sun,2024-08-18,20:45,Cagliari,1.0,0–0,0.9,Roma,,Unipol Domus,,Match Report,
7,1.0,Sun,2024-08-18,20:45,Lazio,2.4,3–1,0.6,Venezia,,Stadio Olimpico,,Match Report,
8,1.0,Mon,2024-08-19,18:30,Lecce,1.5,0–4,1.7,Atalanta,,Stadio Comunale Ettore Giardiniero,,Match Report,
9,1.0,Mon,2024-08-19,20:45,Juventus,1.2,3–0,0.2,Como,,Allianz Stadium,,Match Report,


In [6]:
# drop useless columns
columns_to_drop = ['Day', 'Date', 'Time', 'Attendance', 'Venue', 'Referee', 'Notes']
df_SerieA.drop(columns=columns_to_drop, inplace=True)

# filter matches that have been played
df_SerieA = df_SerieA[df_SerieA['Match Report'] == 'Match Report']
df_SerieA.drop(columns=['Match Report'], inplace=True)

# obtain home & away goals from the score column
df_SerieA[['home_goals', 'away_goals']] = df_SerieA['Score'].str.split('–', n=1, expand=True)
df_SerieA.drop(columns=['Score'], inplace=True)

# rename dataframe columns
df_SerieA.columns = ['week', 'home', 'xG_home', 'xG_away', 'away', 'home_goals', 'away_goals']

df_SerieA.head(10)

Unnamed: 0,week,home,xG_home,xG_away,away,home_goals,away_goals
0,1.0,Genoa,2.1,1.9,Inter,2,2
1,1.0,Parma,1.7,0.7,Fiorentina,1,1
2,1.0,Milan,2.1,1.0,Torino,2,2
3,1.0,Empoli,0.5,0.3,Monza,0,0
4,1.0,Hellas Verona,1.7,1.0,Napoli,3,0
5,1.0,Bologna,3.2,0.9,Udinese,1,1
6,1.0,Cagliari,1.0,0.9,Roma,0,0
7,1.0,Lazio,2.4,0.6,Venezia,3,1
8,1.0,Lecce,1.5,1.7,Atalanta,0,4
9,1.0,Juventus,1.2,0.2,Como,3,0


Iterate over each match to calculate xP and points of each team and add a column dataframe with: `xp_home`, `xp_away` `points_home`, `points_away`.

In [7]:
def compute_xp_and_points(row):
    xp_home, xp_away = calculate_xp(row['xG_home'], row['xG_away'])
    points_home, points_away = calculate_points(row['home_goals'], row['away_goals'])
    
    return pd.Series({
        'xPts_home': xp_home,
        'xPts_away': xp_away,
        'points_home': points_home,
        'points_away': points_away
    })

# apply the function to each row and assign the results to new columns
df_SerieA[['xPts_home', 'xPts_away', 'points_home', 'points_away']] = df_SerieA.apply(compute_xp_and_points, axis=1)

df_SerieA.head(10)

Unnamed: 0,week,home,xG_home,xG_away,away,home_goals,away_goals,xPts_home,xPts_away,points_home,points_away
0,1.0,Genoa,2.1,1.9,Inter,2,2,1.51,1.28,1.0,1.0
1,1.0,Parma,1.7,0.7,Fiorentina,1,1,2.08,0.69,1.0,1.0
2,1.0,Milan,2.1,1.0,Torino,2,2,2.08,0.72,1.0,1.0
3,1.0,Empoli,0.5,0.3,Monza,0,0,1.46,1.03,1.0,1.0
4,1.0,Hellas Verona,1.7,1.0,Napoli,3,0,1.85,0.91,3.0,0.0
5,1.0,Bologna,3.2,0.9,Udinese,1,1,2.56,0.32,1.0,1.0
6,1.0,Cagliari,1.0,0.9,Roma,0,0,1.42,1.26,1.0,1.0
7,1.0,Lazio,2.4,0.6,Venezia,3,1,2.47,0.37,3.0,0.0
8,1.0,Lecce,1.5,1.7,Atalanta,0,4,1.26,1.51,0.0,3.0
9,1.0,Juventus,1.2,0.2,Como,3,0,2.2,0.49,3.0,0.0


In [8]:
df_home= df_SerieA[['home', 'xPts_home', 'points_home']]
df_home = df_home.groupby(['home']).sum()
df_home

Unnamed: 0_level_0,xPts_home,points_home
home,Unnamed: 1_level_1,Unnamed: 2_level_1
Atalanta,3.11,3.0
Bologna,4.33,3.0
Cagliari,5.35,2.0
Como,3.43,4.0
Empoli,3.41,3.0
Fiorentina,6.45,5.0
Genoa,3.92,2.0
Hellas Verona,2.99,3.0
Inter,4.64,6.0
Juventus,4.86,5.0


In [9]:
df_away= df_SerieA[['away', 'xPts_away', 'points_away']]
df_away = df_away.groupby(['away']).sum()
df_away

Unnamed: 0_level_0,xPts_away,points_away
away,Unnamed: 1_level_1,Unnamed: 2_level_1
Atalanta,7.32,4.0
Bologna,3.2,4.0
Cagliari,2.86,3.0
Como,5.19,4.0
Empoli,3.41,7.0
Fiorentina,3.66,2.0
Genoa,2.12,3.0
Hellas Verona,3.16,3.0
Inter,6.01,5.0
Juventus,6.24,7.0


Perform an inner join between df_home and df_away, calculate new columns points, xPts, and diff, and select only these columns.

In [10]:
# perform an inner join between df_home and df_away
df_result = pd.concat([df_home, df_away], axis=1, join='inner').round(2)

# calculate new columns
df_result['points'] = df_result['points_home'] + df_result['points_away']
df_result['xPts'] = df_result['xPts_home'] + df_result['xPts_away']
df_result['diff'] = df_result['points'] - df_result['xPts']

# select only the new columns
df_result = df_result[['points', 'xPts', 'diff']]
df_result

Unnamed: 0,points,xPts,diff
Atalanta,7.0,10.43,-3.43
Bologna,7.0,7.53,-0.53
Cagliari,5.0,8.21,-3.21
Como,8.0,8.62,-0.62
Empoli,10.0,6.82,3.18
Fiorentina,7.0,10.11,-3.11
Genoa,5.0,6.04,-1.04
Hellas Verona,6.0,6.15,-0.15
Inter,11.0,10.65,0.35
Juventus,12.0,11.1,0.9


Sort by `points` to obtain the actual standings.

In [11]:
df_result.sort_values(by=['points'], ascending=False)

Unnamed: 0,points,xPts,diff
Napoli,13.0,10.31,2.69
Juventus,12.0,11.1,0.9
Milan,11.0,11.35,-0.35
Torino,11.0,6.86,4.14
Inter,11.0,10.65,0.35
Lazio,10.0,10.49,-0.49
Udinese,10.0,5.8,4.2
Empoli,10.0,6.82,3.18
Roma,9.0,8.96,0.04
Como,8.0,8.62,-0.62


Sort by `xPts` to get the expected points standings.

In [12]:
df_result.sort_values(by=['xPts'], ascending=False)

Unnamed: 0,points,xPts,diff
Milan,11.0,11.35,-0.35
Juventus,12.0,11.1,0.9
Inter,11.0,10.65,0.35
Lazio,10.0,10.49,-0.49
Atalanta,7.0,10.43,-3.43
Napoli,13.0,10.31,2.69
Fiorentina,7.0,10.11,-3.11
Roma,9.0,8.96,0.04
Como,8.0,8.62,-0.62
Cagliari,5.0,8.21,-3.21


Sort by `diff` to get the difference standings.

In [13]:
df_result.sort_values(by=['diff'], ascending=False)

Unnamed: 0,points,xPts,diff
Udinese,10.0,5.8,4.2
Torino,11.0,6.86,4.14
Empoli,10.0,6.82,3.18
Napoli,13.0,10.31,2.69
Juventus,12.0,11.1,0.9
Inter,11.0,10.65,0.35
Roma,9.0,8.96,0.04
Hellas Verona,6.0,6.15,-0.15
Milan,11.0,11.35,-0.35
Lazio,10.0,10.49,-0.49
