In [2]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm, norm
import matplotlib.pyplot as plt
import seaborn as sns

# Conditional Outcome Function
def conditional_outcome(s1, s2, result, sigma_t):
    mean_diff = s1 - s2
    std_diff = sigma_t

    # Set truncation limits based on match outcome
    if result == 1:
        a, b = 0, np.inf  # Truncate for player 1 winning
    else:
        a, b = -np.inf, 0  # Truncate for player 2 winning

    # Standardize the truncation limits
    a_std = (a - mean_diff) / std_diff
    b_std = (b - mean_diff) / std_diff

    return a_std, b_std, mean_diff, std_diff

def gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result):
    s1_samples = np.zeros(n_iter)
    s2_samples = np.zeros(n_iter)
    t_samples = np.zeros(n_iter)
    y_samples = np.zeros(n_iter)
    
    s1_current = mu_s1
    s2_current = mu_s2

#  prior means and variances
    mu_prior = np.array([mu_s1, mu_s2]).reshape((2, 1))
    sigma_prior = np.array([[sigma_s1, 0], [0, sigma_s2]])
    sigma_prior_inv = np.linalg.inv(sigma_prior)

    A = np.array([1, -1]).reshape((1, 2))
    A_T = A.T

    for i in range(n_iter):
        #  Sample t given s1_current, s2_current (t|s1,s2)
        a_std, b_std, mean_diff, std_diff = conditional_outcome(s1_current, s2_current, result, sigma_t)
        t_current = truncnorm.rvs(a=a_std, b=b_std, loc=mean_diff, scale=std_diff)
        
        #  Sample s1 and s2 given t_current(s1,s2|t) 
        #  posterior covariance
        sigma_post = np.linalg.inv(sigma_prior_inv + (A_T @ A) / sigma_t**2)
        # posterior mean
        mu_post = sigma_post @ (sigma_prior_inv @ mu_prior + (A_T * t_current) / sigma_t**2)
        s1_current, s2_current = np.random.multivariate_normal(mu_post.flatten(), sigma_post)
        s1_samples[i] = s1_current
        s2_samples[i] = s2_current
        t_samples[i] = t_current
        y_samples[i] = result

# Compute sample means and variances
    mean_s1 = np.mean(s1_samples)
    var_s1 = np.var(s1_samples, ddof=1)
    mean_s2 = np.mean(s2_samples)
    var_s2 = np.var(s2_samples, ddof=1)

    return mean_s1, var_s1, mean_s2, var_s2

def win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t**2)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1


data_file = 'SerieA.csv'
df = pd.read_csv(data_file)
df_filtered = df[df['score1'] != df['score2']].copy()
df_filtered['win'] = df_filtered.apply(lambda row: 1 if row['score1'] > row['score2'] else -1, axis=1)

# Initialize Skills for Each Team
initial_mu = 25  
initial_sigma = 8.33  
sigma_t = 25 / 6   

teams = set(df_filtered['team1']).union(set(df_filtered['team2']))
skills = {team: (initial_mu, initial_sigma) for team in teams}

n_iter = 1000  # Number of iterations for the Gibbs sampler


# Iterate over each match
for index, row in df_filtered.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['win']
    
    # Get prior means and variances
    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]
    
    # Run Gibbs sampler
    mean_s1, var_s1, mean_s2, var_s2 = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)
    
    # Update skills: Update both mean and variance
    skills[team1] = (mean_s1, var_s1)
    skills[team2] = (mean_s2, var_s2)

# Sort and Display the Updated Skills
# Sort teams by their updated mean skills in descending order
sorted_skills = sorted(skills.items(), key=lambda x: x[1][0], reverse=True)

print("\nUpdated Skills:")
for team, (mu, sigma) in sorted_skills:
    print(f"{team}: Mean Skill = {mu:.2f}, Variance = {sigma:.2f}")


# Calculate win probability for Lazio vs. Inter
team1 = 'Lazio'
team2 = 'Inter'

mu_s1, sigma_s1 = skills[team1]
mu_s2, sigma_s2 = skills[team2]

prob_win = win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t)

print(f"\nWin Probability for {team1} vs. {team2}:")
print(f"Skills of {team1}: Mean = {mu_s1:.2f}, Variance = {sigma_s1:.2f}")
print(f"Skills of {team2}: Mean = {mu_s2:.2f}, Variance = {sigma_s2:.2f}")
print(f"Probability that {team1} wins: {prob_win:.4f}")



Updated Skills:
Juventus: Mean Skill = 29.74, Variance = 1.09
Napoli: Mean Skill = 28.13, Variance = 1.42
Milan: Mean Skill = 27.50, Variance = 1.12
Torino: Mean Skill = 27.18, Variance = 1.37
Inter: Mean Skill = 26.85, Variance = 1.00
Roma: Mean Skill = 26.80, Variance = 1.01
Atalanta: Mean Skill = 26.68, Variance = 0.97
Lazio: Mean Skill = 25.84, Variance = 0.66
Sampdoria: Mean Skill = 25.14, Variance = 0.97
Spal: Mean Skill = 24.32, Variance = 1.17
Bologna: Mean Skill = 24.02, Variance = 1.00
Udinese: Mean Skill = 23.93, Variance = 1.17
Parma: Mean Skill = 23.90, Variance = 0.98
Sassuolo: Mean Skill = 23.83, Variance = 1.43
Empoli: Mean Skill = 23.71, Variance = 1.13
Fiorentina: Mean Skill = 23.55, Variance = 1.63
Cagliari: Mean Skill = 23.42, Variance = 1.03
Genoa: Mean Skill = 23.41, Variance = 1.50
Frosinone: Mean Skill = 21.59, Variance = 1.34
Chievo: Mean Skill = 19.98, Variance = 1.40

Win Probability for Lazio vs. Inter:
Skills of Lazio: Mean = 25.84, Variance = 0.66
Skills 