In [65]:
from cgi import print_form
from os import chmod

import numpy as np
import pandas as pd
from exceptiongroup import print_exc
from scipy.stats import truncnorm, norm
from six import print_


# Conditional skills
def conditional_skills(mu_s1, mu_s2, sigma_s1, sigma_s2, t, outcome_var):
    mean_s = np.array([mu_s1, mu_s2])
    cov_s = np.array([[sigma_s1, 0],
                      [0, sigma_s2]])  # Covariance matrix with S_12 and S_21
    outcome_mean = mu_s1 - mu_s2
    outcome_var = sigma_s1 + sigma_s2 + outcome_var # Outcome variance

    # Conditional mean update
    mean_cond = mean_s + (t - outcome_mean) / outcome_var * np.array([sigma_s1, -sigma_s2])

    # Conditional covariance update
    cov_cond = cov_s - np.outer([sigma_s1, -sigma_s2], [sigma_s1, -sigma_s2]) / outcome_var
  
    return mean_cond, cov_cond


# Conditional outcome (truncated Gaussian)
def conditional_outcome(mu_s1, mu_s2, result, outcome_var): #changed so that it only depends on mu_s1 and mu_s2
    mean_diff = mu_s1 - mu_s2
    std_diff = outcome_var

    # Set truncation limits based on match outcome
    if result == 1:
        a, b = 0, np.inf  # Truncate for team 1 winning
    else:
        a, b = -np.inf, 0  # Truncate for team 2 winning

    # Create truncated Gaussian distribution
    trunc_gauss_dist = truncnorm(a=(a - mean_diff) / std_diff, b=(b - mean_diff) / std_diff, loc=mean_diff, scale=std_diff)
    
    return a,b



# Marginal probability that team 1 wins
def marginal_p_y1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1

n_iter = 100 
mu_s1 = 25 # Initial mean for player 1
mu_s2 = 25 # Initial mean for player 2
sigma_s1 = 8.33 # Variance for player 1
sigma_s2 = 8.33 # Variance for player 2
sigma_t = 25/6 # Outcome variance

In [66]:
def gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result):
    s1_samples = np.zeros(n_iter)
    s2_samples = np.zeros(n_iter)
    t_samples = np.zeros(n_iter)
    y_samples = np.zeros(n_iter)
    s1, s2 = mu_s1, mu_s2

    #initializing the truncation limits
    a, b = conditional_outcome(s1, s2, result, sigma_t)
    t = truncnorm.rvs(a, b, loc=s1 - s2, scale= sigma_t)

    for i in range(n_iter):
        y = result 
        mean_cond, cov_cond = conditional_skills(mu_s1, mu_s2,sigma_s1, sigma_s2, t, sigma_t) # P(s1, s2 | t)
        s1, s2 = np.random.multivariate_normal(mean_cond, cov_cond)
        s1_samples[i] = s1
        s2_samples[i] = s2
        a, b = conditional_outcome(s1, s2, y, sigma_t)
        t = truncnorm.rvs(a, b, loc=s1 - s2, scale= sigma_t) #P(t | s1, s2 ,y)
        t_samples[i] = t
        y_samples[i] = y

    return s1_samples, s2_samples, t_samples

In [67]:
import pandas as pd
data = 'SerieA.csv'
df = pd.read_csv(data)
df_filtered = df[df['score1'] != df['score2']].copy()  # Use .copy() to avoid the warning
df_filtered.loc[:, 'win'] = df_filtered.apply(lambda row: 1 if row['score1'] > row['score2'] else -1, axis=1)

In [68]:
print(df_filtered)

     yyyy-mm-dd  HH:MM     team1     team2  score1  score2  win
0    2018-08-18  18:00    Chievo  Juventus       2       3   -1
1    2018-08-18  20:30     Lazio    Napoli       1       2   -1
2    2018-08-19  18:00    Torino      Roma       0       1   -1
3    2018-08-19  20:30  Sassuolo     Inter       1       0    1
5    2018-08-19  20:30    Empoli  Cagliari       2       0    1
..          ...    ...       ...       ...     ...     ...  ...
374  2019-05-26  20:30      Spal     Milan       2       3   -1
375  2019-05-26  20:30      Roma     Parma       2       1    1
376  2019-05-26  20:30     Inter    Empoli       2       1    1
378  2019-05-26  20:30  Cagliari   Udinese       1       2   -1
379  2019-05-26  20:30  Atalanta  Sassuolo       3       1    1

[272 rows x 7 columns]


In [69]:
#Initialize skills for every team
skills = {}
for team in df_filtered['team1'].unique():
    skills[team] = (25, 8.33)
for team in df_filtered['team2'].unique():
    skills[team] = (25, 8.33) 

In [70]:
print(skills)

{'Chievo': (25, 8.33), 'Lazio': (25, 8.33), 'Torino': (25, 8.33), 'Sassuolo': (25, 8.33), 'Empoli': (25, 8.33), 'Bologna': (25, 8.33), 'Atalanta': (25, 8.33), 'Juventus': (25, 8.33), 'Napoli': (25, 8.33), 'Spal': (25, 8.33), 'Udinese': (25, 8.33), 'Genoa': (25, 8.33), 'Fiorentina': (25, 8.33), 'Milan': (25, 8.33), 'Parma': (25, 8.33), 'Sampdoria': (25, 8.33), 'Inter': (25, 8.33), 'Frosinone': (25, 8.33), 'Roma': (25, 8.33), 'Cagliari': (25, 8.33)}


In [None]:
#run the Gibbs sampler for every match
n_iter = 100 
sigma_t = 25/6

for index, row in df_filtered.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['win']
    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]
    s1_samples, s2_samples, t_samples = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)
    skills[team1] = (np.mean(s1_samples), np.var(s1_samples, ddof=1))
    skills[team2] = (np.mean(s2_samples), np.var(s2_samples, ddof=1))
    
    


In [43]:
# Sorting the dictionary by the first value in the tuple (in ascending order)
sorted_data = sorted(skills.items(), key=lambda x: x[1][0])


# Print the sorted result
for team, values in sorted_data:
    print(f"{team}: {values}")

Genoa: (np.float64(-886.8557243489212), np.float64(4191.51283424325))
Parma: (np.float64(-886.1263384872487), np.float64(6663.8022137942))
Empoli: (np.float64(-788.9283087905463), np.float64(2952.647629430265))
Frosinone: (np.float64(-683.5572679899904), np.float64(3575.961094752926))
Spal: (np.float64(-564.7740955465595), np.float64(4538.449604828137))
Udinese: (np.float64(-348.9675018788974), np.float64(5442.61316928823))
Sassuolo: (np.float64(-262.37175488379967), np.float64(5545.553818412744))
Bologna: (np.float64(-253.41579325717805), np.float64(5719.59850623399))
Fiorentina: (np.float64(-230.55191412311854), np.float64(2543.8940717092128))
Cagliari: (np.float64(-190.90907480463983), np.float64(6334.6861166543285))
Chievo: (np.float64(-66.72418365350975), np.float64(138.36550410151293))
Sampdoria: (np.float64(-37.42337529602524), np.float64(5459.91768257253))
Lazio: (np.float64(29.187817297851637), np.float64(806.6090987804874))
Inter: (np.float64(494.82266115023316), np.float64(6

In [44]:
data = 'SerieA.csv'
df_random = pd.read_csv(data)
df_random = df_random[df_random['score1'] != df_random['score2']].copy()  # Use .copy() to avoid the warning
df_random.loc[:, 'win'] = df_random.apply(lambda row: 1 if row['score1'] > row['score2'] else -1, axis=1)
df_shuffled = df_random.sample(frac=1).reset_index(drop=True)

In [45]:
print(df_shuffled)

     yyyy-mm-dd  HH:MM      team1       team2  score1  score2  win
0    2018-12-01  20:30  Sampdoria     Bologna       4       1    1
1    2018-12-29  15:00    Udinese    Cagliari       2       0    1
2    2019-05-19  18:00      Milan   Frosinone       2       0    1
3    2018-11-04  20:30    Udinese       Milan       0       1   -1
4    2018-08-25  18:00   Juventus       Lazio       2       0    1
..          ...    ...        ...         ...     ...     ...  ...
267  2018-09-15  18:00     Napoli  Fiorentina       1       0    1
268  2018-12-15  18:00      Inter     Udinese       1       0    1
269  2018-09-27  19:00       Spal    Sassuolo       0       2   -1
270  2019-01-19  18:00    Udinese       Parma       1       2   -1
271  2019-03-17  15:00      Lazio       Parma       4       1    1

[272 rows x 7 columns]


In [46]:
skills = {}
for team in df_shuffled['team1'].unique():
    skills[team] = (25, 8.33)
for team in df_shuffled['team2'].unique():
    skills[team] = (25, 8.33)

print(skills)

{'Sampdoria': (25, 8.33), 'Udinese': (25, 8.33), 'Milan': (25, 8.33), 'Juventus': (25, 8.33), 'Inter': (25, 8.33), 'Cagliari': (25, 8.33), 'Napoli': (25, 8.33), 'Frosinone': (25, 8.33), 'Lazio': (25, 8.33), 'Roma': (25, 8.33), 'Atalanta': (25, 8.33), 'Empoli': (25, 8.33), 'Fiorentina': (25, 8.33), 'Bologna': (25, 8.33), 'Parma': (25, 8.33), 'Spal': (25, 8.33), 'Chievo': (25, 8.33), 'Sassuolo': (25, 8.33), 'Torino': (25, 8.33), 'Genoa': (25, 8.33)}


In [47]:
n_iter = 10 
sigma_t = 25/6
for index, row in df_shuffled.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['win']
    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]
    s1_samples, s2_samples, t_samples = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)
    skills[team1] = (np.mean(s1_samples), np.var(s1_samples, ddof=1))
    skills[team2] = (np.mean(s2_samples), np.var(s2_samples, ddof=1))

In [48]:
sorted_data = sorted(skills.items(), key=lambda x: x[1][0])
for team, values in sorted_data:
    print(f"{team}: {values}")

Spal: (np.float64(-26.91549475712178), np.float64(79.19838786785277))
Frosinone: (np.float64(-3.7175388498455524), np.float64(0.024760300838359353))
Chievo: (np.float64(-1.3157023197393802), np.float64(0.6428189768241306))
Sassuolo: (np.float64(11.086133614107487), np.float64(4.1468182278836245))
Cagliari: (np.float64(13.70017215701821), np.float64(0.553867409725009))
Parma: (np.float64(20.45992380562432), np.float64(6.541697095215325))
Fiorentina: (np.float64(20.73440277879855), np.float64(0.6393217672923578))
Genoa: (np.float64(21.739678831722287), np.float64(0.006849326023960999))
Udinese: (np.float64(26.281329149559618), np.float64(0.009641906543119218))
Empoli: (np.float64(29.814321717278972), np.float64(0.006822917103627497))
Bologna: (np.float64(30.48008902343895), np.float64(20.27899911149764))
Sampdoria: (np.float64(32.9025558788923), np.float64(0.00022354200900542382))
Torino: (np.float64(33.17994146805493), np.float64(0.3877082196572903))
Roma: (np.float64(40.836674332940866

In [54]:
def win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1

In [63]:
mu_s1, sigma_s1 = skills['Lazio']
mu_s2, sigma_s2 = skills['Inter']
sigma_t = 25/6

In [64]:
print("Skills of Napoli is:," , mu_s1, sigma_s1)
print("Skills of Inter is:," , mu_s2, sigma_s2)

print("Win probability for team1:", win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t))

Skills of Napoli is:, 65.87526463127435 3.068844450070628
Skills of Inter is:, 62.226582465471914 34.957618337095795
Win probability for team1: 0.7128445161584157
