In [6]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm, norm
import matplotlib.pyplot as plt
import seaborn as sns
from six import print_, moves


# Conditional Outcome Function
def conditional_outcome(s1, s2, result, sigma_t):
    mean_diff = s1 - s2
    std_diff = sigma_t

    if result == 1:
        a, b = 0, np.inf  
    else:
        a, b = -np.inf, 0  

    a_std = (a - mean_diff) / std_diff
    b_std = (b - mean_diff) / std_diff

    return a_std, b_std, mean_diff, std_diff

def gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result):
    s1_samples = np.zeros(n_iter)
    s2_samples = np.zeros(n_iter)
    t_samples = np.zeros(n_iter)
    y_samples = np.zeros(n_iter)
    
    s1_current = mu_s1
    s2_current = mu_s2

    mu_prior = np.array([mu_s1, mu_s2]).reshape((2, 1))
    sigma_prior = np.array([[sigma_s1, 0], [0, sigma_s2]])
    sigma_prior_inv = np.linalg.inv(sigma_prior)

    A = np.array([1, -1]).reshape((1, 2))
    A_T = A.T

    for i in range(n_iter):
        a_std, b_std, mean_diff, std_diff = conditional_outcome(s1_current, s2_current, result, sigma_t)
        t_current = truncnorm.rvs(a=a_std, b=b_std, loc=mean_diff, scale=std_diff)
        sigma_post = np.linalg.inv(sigma_prior_inv + (A_T @ A) / sigma_t**2)
        mu_post = sigma_post @ (sigma_prior_inv @ mu_prior + (A_T * t_current) / sigma_t**2)
        s1_current, s2_current = np.random.multivariate_normal(mu_post.flatten(), sigma_post)
        s1_samples[i] = s1_current
        s2_samples[i] = s2_current
        t_samples[i] = t_current
        y_samples[i] = result

    mean_s1 = np.mean(s1_samples)
    var_s1 = np.var(s1_samples, ddof=1)
    mean_s2 = np.mean(s2_samples)
    var_s2 = np.var(s2_samples, ddof=1)

    return mean_s1, var_s1, mean_s2, var_s2

def win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t**2)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1




In [11]:
data_file = 'SerieA.csv'
df = pd.read_csv(data_file)
df_filtered = df[df['score1'] != df['score2']].copy()
df_filtered['win'] = df_filtered.apply(lambda row: 1 if row['score1'] > row['score2'] else -1, axis=1)
initial_mu = 25
initial_sigma = 8.33 ** 2
sigma_t = 25 / 6
teams = set(df_filtered['team1']).union(set(df_filtered['team2']))
skills = {team: (initial_mu, initial_sigma) for team in teams}
n_iter = 100 
predictions = []
random_predictions = []



for index, row in df_filtered.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['win']

    # Get prior means and variances
    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]

    # Run Gibbs sampler
    mean_s1, var_s1, mean_s2, var_s2 = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)

    # Update skills: Update both mean and variance
    skills[team1] = (mean_s1, var_s1)
    skills[team2] = (mean_s2, var_s2)
    
    if index < 272:
        p_y1 = win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t)
        predictions.append(1 if p_y1 > 0.5 else -1)
        random_predictions.append(1 if np.random.rand() > 0.5 else -1)

correct = 0
correct_random = 0

for i in range(len(predictions)):
    if predictions[i] == df_filtered.iloc[i]['win']:
        correct += 1
    if random_predictions[i] == df_filtered.iloc[i]['win']:
        correct_random += 1

print(f"Accuracy of the model: {correct / len(predictions)}")
print(f"Accuracy of the random model: {correct_random / len(predictions)}")

Accuracy of the model: 0.6494845360824743
Accuracy of the random model: 0.4742268041237113


In [12]:
# Sort and Display the Updated Skills
# Sort teams by their updated mean skills in descending order
sorted_skills = sorted(skills.items(), key=lambda x: x[1][0], reverse=True)

print("\nUpdated Skills:")
for team, (mu, sigma) in sorted_skills:
    print(f"{team}: Mean Skill = {mu:.2f}, Variance = {sigma:.2f}")




Updated Skills:
Juventus: Mean Skill = 32.57, Variance = 1.32
Milan: Mean Skill = 28.33, Variance = 1.51
Inter: Mean Skill = 28.21, Variance = 1.84
Atalanta: Mean Skill = 27.28, Variance = 1.20
Napoli: Mean Skill = 27.19, Variance = 3.01
Roma: Mean Skill = 26.90, Variance = 1.45
Torino: Mean Skill = 26.35, Variance = 1.74
Lazio: Mean Skill = 25.70, Variance = 0.78
Sampdoria: Mean Skill = 24.22, Variance = 0.99
Spal: Mean Skill = 23.91, Variance = 1.95
Bologna: Mean Skill = 23.89, Variance = 1.64
Cagliari: Mean Skill = 23.30, Variance = 2.36
Parma: Mean Skill = 23.23, Variance = 1.13
Genoa: Mean Skill = 23.14, Variance = 2.65
Empoli: Mean Skill = 23.10, Variance = 1.75
Fiorentina: Mean Skill = 22.94, Variance = 1.10
Udinese: Mean Skill = 22.79, Variance = 2.42
Sassuolo: Mean Skill = 22.40, Variance = 1.30
Frosinone: Mean Skill = 19.64, Variance = 1.69
Chievo: Mean Skill = 18.72, Variance = 3.51


In [14]:
n = 1000
mu1, sigma1 = 25, 8.33 ** 2
mu2, sigma2 = 25, 8.33 ** 2
sigma_t = (25 / 6) 
result = 1
s1_mean, s1_variance, s2_mean, s2_variance = gibbs_sampler_result(mu1, mu2, sigma1, sigma2, sigma_t, n, result)

print(f"Mean of s1: {s1_mean}")
print(f"Variance of s1: {s1_variance}")
print(f"Mean of s2: {s2_mean}")
print(f"Variance of s2: {s2_variance}")


Mean of s1: 29.015796190129848
Variance of s1: 49.32923285444963
Mean of s2: 21.02977340456836
Variance of s2: 48.1958703393146


In [50]:
import numpy as np
from scipy.stats import truncnorm
from six import print_


def multiplyGauss(m1, s1, m2, s2):
    s = 1 / (1 / s1 + 1 / s2)
    m = (m1 / s1 + m2 / s2) * s
    return m, s

def divideGauss(m1, s1, m2, s2):
    m,s =  multiplyGauss(m1, s1, m2, -s2)
    return m, s

def truncGaussMM(a,b,m0,s0):
    a_scaled, b_scaled = (a - m0) / np.sqrt(s0), (b - m0) / np.sqrt(s0)
    m = truncnorm.mean(a_scaled,b_scaled, loc=m0, scale=np.sqrt(s0))
    s = truncnorm.var(a_scaled, b_scaled, loc=m0, scale=np.sqrt(s0))
    return m,s 

In [88]:
m1, s1 =  25, (25/3) ** 2
m2, s2 = 25, (25/3) ** 2
sv = (25/6) ** 2
y0 = 1
mu_diff_m = m1 - m2
mu_diff_s = s1 + s2 + sv

mu3_m = m1
mu3_s = s1

mu4_m = mu3_m
mu4_s = mu3_s

mu5_m = mu_diff_m 
mu5_s = mu_diff_s 

if y0 == 1:
    a,b = 0, np.inf
else:
    a,b = -np.inf, 0


pt_m, pt_s = truncGaussMM(a, b, mu5_m, mu5_s)
# Compute the message from t to f_3
mu6_m, mu6_s = divideGauss(pt_m, pt_s, mu5_m, mu5_s)
# Compute the message from f_3 to s1
mu7_m = mu6_m 
mu7_s = mu6_s 
# Compute the marginal of x
px_m, px_s = multiplyGauss(mu3_m, mu3_s, mu7_m, mu7_s)
print(f"Mean of x: {px_m}")
print(f"Variance of x: {px_s}")


#compute the message from f_3 to s2
mu_8m = mu6_m 
mu_8s = mu6_s 
px_m, px_s = multiplyGauss(mu3_m, mu3_s, mu_8m, mu_8s)
print(f"Mean of x: {px_m}")
print(f"Variance of x: {px_s}")


Mean of x: 20.91401871338585
Variance of x: 39.04357954105098
Mean of x: 20.91401871338585
Variance of x: 39.04357954105098
