In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
from livescore_scraper import scrape

df = scrape('2019-08-9', '2021-04-22')

index_names = df[(df['home_team'] == 'Norrby IF U19')
                 |(df['home_team'] == 'Malmo FF U19')
                 |(df['away_team'] == 'Norrby IF U19')
                 |(df['away_team'] == 'Malmo FF U19')].index

df.drop(index_names, inplace = True)

from soccer_preprocessing import preprocess
df_team_stats = preprocess(df)

In [3]:
def compute_goal_expectancy(df_team_stats, home_team, away_team):
    home = df_team_stats[df_team_stats.team == home_team].iloc[0]
    away = df_team_stats[df_team_stats.team == away_team].iloc[0]
    return (np.round(home.offensive_strength_home * away.defensive_strength_away * home.avg_goals_home, 2),
            np.round(away.offensive_strength_away * home.defensive_strength_home * away.avg_goals_away, 2))

In [12]:
def poisson(mu, k):
    return (np.math.pow(mu, k) * np.exp(-mu)) / np.math.factorial(k)

def compute_poisson_matrix(df, ht, at, max_goals=10):
    matrix = []
    geh, gea = compute_goal_expectancy(df, ht, at)
    for i in range(max_goals+1):
        matrix_i = []
        for j in range(max_goals+1):
            p_home = poisson(geh, i)
            p_away = poisson(gea, j)
            matrix_i.append(np.round(p_home * p_away, 4))
        matrix.append(matrix_i)
    return pd.DataFrame(matrix)

def compute_tau(x, y, mu, nu, rho):
    if x == y == 0:
        return 1 - mu * nu * rho
    elif x == 1 and y == 0:
        return 1 + nu * rho
    elif x == 0 and y == 1:
        return 1 + mu * rho
    elif x == y == 1:
        return 1 - rho
    else:
        return 1

def compute_dixon_coles_matrix(df, ht, at, max_goals=10, rho=0.7):
    matrix = []
    geh, gea = compute_goal_expectancy(df, ht, at)
    for i in range(max_goals+1):
        matrix_i = []
        for j in range(max_goals+1):
            tau = compute_tau(i, j, geh, gea, rho)
            p_home = poisson(geh, i)
            p_away = poisson(gea, j)
            matrix_i.append(np.round(p_home * p_away * tau, 4))
        matrix.append(matrix_i)
    return pd.DataFrame(matrix)

def win_probabilities_poisson(df, ht, at):
    matrix = compute_poisson_matrix(df, ht, at)
    
    matrix_cols = matrix.columns.values
    matrix_indexes = matrix.index.values
    
    phw, paw, pdr = 0, 0, 0
    for col in matrix_cols:
        for index in matrix_indexes:
            if index > col:
                phw += matrix.loc[index, col]
            elif index == col:
                pdr += matrix.loc[index, col]
            else:
                paw += matrix.loc[index, col]
    
    return phw, pdr, paw

def win_probabilities_dixon_coles(df, ht, at):
    matrix = compute_dixon_coles_matrix(df, ht, at)
    
    matrix_cols = matrix.columns.values
    matrix_indexes = matrix.index.values
    
    phw, paw, pdr = 0, 0, 0
    for col in matrix_cols:
        for index in matrix_indexes:
            if index > col:
                phw += matrix.loc[index, col]
            elif index == col:
                pdr += matrix.loc[index, col]
            else:
                paw += matrix.loc[index, col]
    
    return phw, pdr, paw

In [13]:
print(win_probabilities_poisson(df_team_stats, 'Reims', 'Marseille'))
print(win_probabilities_dixon_coles(df_team_stats, 'Reims', 'Marseille'))

(0.14459999999999998, 0.4303, 0.4247999999999999)
(0.2022, 0.3151, 0.4823999999999999)


In [14]:
compute_poisson_matrix(df_team_stats, 'Reims', 'Marseille', max_goals=5)

Unnamed: 0,0,1,2,3,4,5
0,0.343,0.2573,0.0965,0.0241,0.0045,0.0007
1,0.1098,0.0823,0.0309,0.0077,0.0014,0.0002
2,0.0176,0.0132,0.0049,0.0012,0.0002,0.0
3,0.0019,0.0014,0.0005,0.0001,0.0,0.0
4,0.0001,0.0001,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
compute_dixon_coles_matrix(df_team_stats, 'Reims', 'Marseille', max_goals=5)

Unnamed: 0,0,1,2,3,4,5
0,0.2854,0.3149,0.0965,0.0241,0.0045,0.0007
1,0.1674,0.0247,0.0309,0.0077,0.0014,0.0002
2,0.0176,0.0132,0.0049,0.0012,0.0002,0.0
3,0.0019,0.0014,0.0005,0.0001,0.0,0.0
4,0.0001,0.0001,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0
