In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
from livescore_scraper import scrape

df = scrape('2020-08-21', '2021-04-23')

index_names = df[(df['home_team'] == 'Norrby IF U19')
                 |(df['home_team'] == 'Malmo FF U19')
                 |(df['away_team'] == 'Norrby IF U19')
                 |(df['away_team'] == 'Malmo FF U19')].index

df.drop(index_names, inplace = True)

In [19]:
df_team_stats

Unnamed: 0,team,nb_played,nb_played_home,nb_played_away,nb_goals,nb_taken,avg_goals,avg_taken,nb_goals_home,nb_goals_away,...,nb_lose_away,avg_wins_away,avg_even_away,avg_lose_away,offensive_strength,defensive_strength,offensive_strength_home,defensive_strength_home,offensive_strength_away,defensive_strength_away
0,Strasbourg,33,16,17,42,51,1.27,1.55,17,25,...,8,0.35,0.18,0.47,0.94,0.48,0.78,0.52,1.11,0.44
1,Paris Saint-Germain,33,17,16,74,25,2.24,0.76,38,36,...,3,0.69,0.12,0.19,1.66,0.98,1.64,0.99,1.7,0.97
2,Angers,33,16,17,34,50,1.03,1.52,16,18,...,6,0.35,0.29,0.35,0.77,0.49,0.73,0.5,0.8,0.48
3,Monaco,33,17,16,70,38,2.12,1.15,39,31,...,6,0.62,0.0,0.38,1.58,0.65,1.68,0.76,1.47,0.56
4,Bordeaux,33,17,16,35,48,1.06,1.45,15,20,...,9,0.31,0.12,0.56,0.79,0.51,0.64,0.61,0.94,0.43
5,Metz,33,16,17,36,38,1.09,1.15,17,19,...,5,0.35,0.35,0.29,0.81,0.65,0.78,0.63,0.85,0.65
6,Marseille,34,17,17,46,41,1.35,1.21,28,18,...,5,0.29,0.41,0.29,1.0,0.61,1.21,0.64,0.8,0.59
7,Lorient,32,15,17,38,58,1.19,1.81,21,17,...,10,0.06,0.35,0.59,0.88,0.41,1.02,0.47,0.76,0.36
8,Dijon,33,17,16,22,56,0.67,1.7,7,15,...,11,0.12,0.19,0.69,0.5,0.44,0.3,0.59,0.71,0.34
9,Rennes,33,16,17,43,35,1.3,1.06,18,25,...,4,0.41,0.35,0.24,0.97,0.7,0.82,0.63,1.11,0.78


In [41]:
def compute_goal_expectancy(df_team_stats, home_team, away_team):
    home = df_team_stats[df_team_stats.team == home_team].iloc[0]
    away = df_team_stats[df_team_stats.team == away_team].iloc[0]
    return (np.round(home.offensive_strength_home * away.defensive_strength_away * home.avg_goals_home, 2),
            np.round(away.offensive_strength_away * home.defensive_strength_home * away.avg_goals_away, 2))

In [42]:
from soccer_preprocessing import preprocess

df_team_stats = preprocess(df)

print(compute_goal_expectancy(df_team_stats, 'Paris Saint-Germain', 'Angers'))

(1.76, 0.84)


In [140]:
def poisson(mu, k):
    return (np.math.pow(mu, k) * np.exp(-mu)) / np.math.factorial(k)

def compute_poisson_matrix(df, ht, at, max_goals=10):
    matrix = []
    geh, gea = compute_goal_expectancy(df, ht, at)
    for i in range(max_goals+1):
        matrix_i = []
        for j in range(max_goals+1):
            p_home = poisson(geh, i)
            p_away = poisson(gea, j)
            matrix_i.append(np.round(p_home * p_away, 4))
        matrix.append(matrix_i)
    return pd.DataFrame(matrix)

def win_probabilities(df, ht, at):
    matrix = compute_poisson_matrix(df, ht, at)
    
    matrix_cols = matrix.columns.values
    matrix_indexes = matrix.index.values
    
    phw, paw, pdr = 0, 0, 0
    for col in matrix_cols:
        for index in matrix_indexes:
            if index > col:
                phw += matrix.loc[index, col]
            elif index == col:
                pdr += matrix.loc[index, col]
            else:
                paw += matrix.loc[index, col]
    
    return phw, pdr, paw

In [141]:
win_probabilities(df_team_stats, 'Paris Saint-Germain', 'Dijon')

(0.5057, 0.2979, 0.19640000000000002)

In [95]:
compute_poisson_matrix(df_team_stats, 'Paris Saint-Germain', 'Dijon')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.1481,0.0977,0.0323,0.0071,0.0012,0.0002,0.0,0.0,0.0,0.0,0.0
1,0.1851,0.1222,0.0403,0.0089,0.0015,0.0002,0.0,0.0,0.0,0.0,0.0
2,0.1157,0.0764,0.0252,0.0055,0.0009,0.0001,0.0,0.0,0.0,0.0,0.0
3,0.0482,0.0318,0.0105,0.0023,0.0004,0.0001,0.0,0.0,0.0,0.0,0.0
4,0.0151,0.0099,0.0033,0.0007,0.0001,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0038,0.0025,0.0008,0.0002,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0008,0.0005,0.0002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0001,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
def analyze_win_lose(team):
    
    fig = plt.figure(figsize=(15,15))
    gs = fig.add_gridspec(4, 4)
    gs.update(hspace = 0.45, wspace = 0.4)
    
    dft = df[((df.home_team == team) | (df.away_team == team)) & (df.played == True)]
    
    df_wins_home = dft[(dft.home_team == team) & (dft.home_score > dft.away_score)]
    df_wins_away = dft[(dft.away_team == team) & (dft.home_score < dft.away_score)]
    
    df_even_home = dft[(dft.home_team == team) & (dft.home_score == dft.away_score)]
    df_even_away = dft[(dft.away_team == team) & (dft.home_score == dft.away_score)]
    
    df_lose_home = dft[(dft.home_team == team) & (dft.home_score < dft.away_score)]
    df_lose_away = dft[(dft.away_team == team) & (dft.home_score > dft.away_score)]
    
    nb_wins = len(df_wins_home) + len(df_wins_away)
    nb_even = len(df_even_home) + len(df_even_away)
    nb_lose = len(df_lose_home) + len(df_lose_away)
    
    wl_ratio = np.round(nb_wins/nb_lose)*100
    
    nb_goals = dft[(dft.home_team == team)].home_score.sum() + dft[(dft.away_team == team)].away_score.sum()
    nb_taken = dft[(dft.home_team == team)].away_score.sum() + dft[(dft.away_team == team)].home_score.sum()
    
    goals_ratio = np.round(nb_goals/nb_taken)*100
    goal_diff = nb_goals - nb_taken
    
    wl_team = []
    for t in set(dft.home_team.unique()).union(set(dft.away_team.unique())):
        home_wins = dft[(dft.home_team == team) & (dft.away_team == t) & (dft.home_score > dft.away_score)]
        away_wins = dft[(dft.home_team == t) & (dft.away_team == team) & (dft.home_score < dft.away_score)]
        
        home_lose = dft[(dft.home_team == team) & (dft.away_team == t) & (dft.home_score < dft.away_score)]
        away_lose = dft[(dft.home_team == t) & (dft.away_team == team) & (dft.home_score > dft.away_score)]
        
        home_even = dft[(dft.home_team == team) & (dft.away_team == t) & (dft.home_score == dft.away_score)]
        away_even = dft[(dft.home_team == t) & (dft.away_team == team) & (dft.home_score == dft.away_score)]
        
        wl_team.append((
            t,
            len(home_wins), len(away_wins), len(home_wins)+len(away_wins),
            len(home_even), len(away_even), len(home_even)+len(away_even),
            len(home_lose), len(away_lose), len(home_lose)+len(away_lose)
        ))
        
    df_wl_team = pd.DataFrame(wl_team, columns=['opponent',
                                                'home_wins', 'away_wins', 'total_wins',
                                               'home_even', 'away_even', 'total_even',
                                               'home_lose', 'away_lose', 'total_lose'])
        
      
    print(f"Total wins: {nb_wins}, home: {len(df_wins_home)}, away: {len(df_wins_away)}\n"
    +f"Total even: {nb_even}, home: {len(df_even_home)}, away: {len(df_even_away)}\n"
    +f"Total lose: {nb_lose}, home: {len(df_lose_home)}, away: {len(df_lose_away)}")
    
#     ax0 = fig.add_subplot(gs[1, :])
#     sns.barplot(x='opponent', y='total_wins', data=df_wl_team, ax=ax0, color='royalblue')
#     plt.xticks(rotation=-15, ha='left')
    
#     ax1 = fig.add_subplot(gs[2, :])
#     sns.barplot(x='opponent', y='total_even', data=df_wl_team, ax=ax1, color='royalblue')
#     plt.xticks(rotation=-15, ha='left')
    
#     ax2 = fig.add_subplot(gs[3, :])
#     sns.barplot(x='opponent', y='total_lose', data=df_wl_team, ax=ax2, color='royalblue')
#     plt.xticks(rotation=-15, ha='left')

    

In [None]:
analyze_win_lose('Paris Saint-Germain')