In [1]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm, norm


def conditional_outcome(s1, s2, result, sigma_t):
    mean_diff = s1 - s2
    std_diff = sigma_t

    if result == 1:
        a, b = 0, np.inf  
    else:
        a, b = -np.inf, 0  

    a_std = (a - mean_diff) / std_diff
    b_std = (b - mean_diff) / std_diff

    return a_std, b_std, mean_diff, std_diff

def gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result):
    s1_samples = np.zeros(n_iter)
    s2_samples = np.zeros(n_iter)
    t_samples = np.zeros(n_iter)
    y_samples = np.zeros(n_iter)
    
    s1_current = mu_s1
    s2_current = mu_s2

    mu_prior = np.array([mu_s1, mu_s2]).reshape((2, 1))
    sigma_prior = np.array([[sigma_s1, 0], [0, sigma_s2]])
    sigma_prior_inv = np.linalg.inv(sigma_prior)

    A = np.array([1, -1]).reshape((1, 2))
    A_T = A.T

    for i in range(n_iter):
        a_std, b_std, mean_diff, std_diff = conditional_outcome(s1_current, s2_current, result, sigma_t)
        t_current = truncnorm.rvs(a=a_std, b=b_std, loc=mean_diff, scale=std_diff)
        sigma_post = np.linalg.inv(sigma_prior_inv + (A_T @ A) / sigma_t**2)
        mu_post = sigma_post @ (sigma_prior_inv @ mu_prior + (A_T * t_current) / sigma_t**2)
        s1_current, s2_current = np.random.multivariate_normal(mu_post.flatten(), sigma_post)
        s1_samples[i] = s1_current
        s2_samples[i] = s2_current
        t_samples[i] = t_current
        y_samples[i] = result

    mean_s1 = np.mean(s1_samples)
    var_s1 = np.var(s1_samples, ddof=1)
    mean_s2 = np.mean(s2_samples)
    var_s2 = np.var(s2_samples, ddof=1)

    return mean_s1, var_s1, mean_s2, var_s2

def win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t**2)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1




In [2]:
data_file = 'SerieA.csv'
df = pd.read_csv(data_file)
df_filtered = df[df['score1'] != df['score2']].copy()
df_filtered['win'] = df_filtered.apply(lambda row: 1 if row['score1'] > row['score2'] else -1, axis=1)
initial_mu = 25
initial_sigma = 8.33
sigma_t = 25 / 6
teams = set(df_filtered['team1']).union(set(df_filtered['team2']))
skills = {team: (initial_mu, initial_sigma) for team in teams}
n_iter = 100 
predictions = []
random_predictions = []



for index, row in df_filtered.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['win']

    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]

    mean_s1, var_s1, mean_s2, var_s2 = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)

    skills[team1] = (mean_s1, var_s1)
    skills[team2] = (mean_s2, var_s2)
    
    if index < 272:
        p_y1 = win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t)
        predictions.append(1 if p_y1 > 0.5 else -1)
        random_predictions.append(1 if np.random.rand() > 0.5 else -1)

correct = 0
correct_random = 0

for i in range(len(predictions)):
    if predictions[i] == df_filtered.iloc[i]['win']:
        correct += 1
    if random_predictions[i] == df_filtered.iloc[i]['win']:
        correct_random += 1

print(f"Accuracy of the model: {correct / len(predictions)}")
print(f"Accuracy of the random model: {correct_random / len(predictions)}")

Accuracy of the model: 0.6597938144329897
Accuracy of the random model: 0.4948453608247423


In [3]:
sorted_skills = sorted(skills.items(), key=lambda x: x[1][0], reverse=True)

print("\nUpdated Skills:")
for team, (mu, sigma) in sorted_skills:
    print(f"{team}: Mean Skill = {mu:.2f}, Variance = {sigma:.2f}")




Updated Skills:
Napoli: Mean Skill = 29.13, Variance = 2.04
Juventus: Mean Skill = 29.08, Variance = 1.02
Milan: Mean Skill = 28.93, Variance = 0.66
Atalanta: Mean Skill = 27.90, Variance = 0.91
Inter: Mean Skill = 27.89, Variance = 1.22
Torino: Mean Skill = 27.35, Variance = 1.82
Roma: Mean Skill = 26.59, Variance = 0.93
Lazio: Mean Skill = 26.26, Variance = 1.45
Bologna: Mean Skill = 25.64, Variance = 2.55
Sampdoria: Mean Skill = 25.08, Variance = 1.19
Spal: Mean Skill = 24.59, Variance = 0.79
Sassuolo: Mean Skill = 24.48, Variance = 2.20
Fiorentina: Mean Skill = 24.26, Variance = 0.71
Cagliari: Mean Skill = 24.21, Variance = 0.52
Udinese: Mean Skill = 24.16, Variance = 1.92
Empoli: Mean Skill = 24.07, Variance = 0.53
Parma: Mean Skill = 23.65, Variance = 2.51
Genoa: Mean Skill = 23.33, Variance = 1.65
Frosinone: Mean Skill = 22.14, Variance = 0.72
Chievo: Mean Skill = 20.31, Variance = 1.37
