In [3]:
import numpy as np
from scipy.stats import truncnorm
from scipy.stats import norm

#Q3.1
def conditional_skills(mu_s1, mu_s2, var_s1, var_s2, var_3, t):
    # Given values
    mean_s = np.array([mu_s1, mu_s2])
    cov_s = np.diag([var_s1, var_s2])  # Using variances directly in the diagonal covariance matrix
    var_t_given_s = var_3
    
    # Define A matrix for the affine transformation
    A = np.array([1, -1]).reshape(1, -1)  # A is [1, -1] as a row vector

    # Calculate the precision matrix (inverse of covariance) for s1 and s2
    cov_s_inv = np.linalg.inv(cov_s)
    
    # Calculate Σ_{s1, s2 | t} using the given formula
    # Σ_{s1,s2 | t} = (Σ^{-1}_{s1,s2} + A^T Σ^{-1}_{t|s1,s2} A)^{-1}
    A_t = A.T  # Transpose of A
    cov_s_inv_update = cov_s_inv + (A_t @ A) / var_t_given_s
    cov_s_given_t = np.linalg.inv(cov_s_inv_update)
    
    # Calculate the term: Σ^{-1}_{s1, s2} μ_{s1, s2} + A^T Σ^{-1}_{t | s1, s2} t
    precision_term = cov_s_inv @ mean_s + (A.flatten() / var_t_given_s) * t

    # Calculate conditional mean μ_{s1, s2 | t}
    mean_s_given_t = cov_s_given_t @ precision_term

    return mean_s_given_t, cov_s_given_t

# quiz
mean_cond, cov_cond = conditional_skills(mu_s1=1, mu_s2=-1, var_s1=1, var_s2=4, var_3=5, t=3)
print("Conditional Mean:", mean_cond)
print("Conditional Covariance Matrix:\n", cov_cond)

# Outcome function based on s1, s2, var_t, and y
def conditional_outcome(s1, s2, var_s1, var_s2, y):
    # Mean and variance for the truncated normal distribution
    mean_diff = s1 - s2
    var_t = var_s1 + var_s2 
    std_diff = np.sqrt(var_t)  # Standard deviation for the truncnorm distribution

    # Setting bounds for truncation based on the outcome y
    if y == 1:
        a, b = 0, np.inf
    elif y == -1:
        a, b = -np.inf, 0
    else:
        raise ValueError("y should be either 1 (Player 1 wins) or -1 (Player 2 wins)")

    # Create the truncated normal distribution with mean_diff and variance var_t
    trunc_gauss_dist = truncnorm(a=(a - mean_diff) / std_diff, b=(b - mean_diff) / std_diff, loc=mean_diff, scale=std_diff)

    # Return mean and variance of the truncated distribution
    mean_t = trunc_gauss_dist.mean()
    var_t_truncated = trunc_gauss_dist.var()

    return mean_t, var_t_truncated #change by s2-s1, var_t, a,b to check quiz

# Quiz
print(conditional_outcome(1,2,1,4,1))

# marginal p(y = 1)
def marginal_p_y1(mu_s1, mu_s2, var_s1, var_s2, var_3):
    # Calculate mean difference
    mu_diff = mu_s1 - mu_s2
    
    # Calculate total variance (sum of variances)
    var_diff = var_s1 + var_s2 + var_3
    
    # Standard deviation for the normal CDF
    sigma_diff = np.sqrt(var_diff)
    
    # Calculate P(Y = 1)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    
    return p_y1

# Quiz
print("marginal_y =", marginal_p_y1(1,-1, 1, 4,5))





# Load the dataset
file_path = 'SerieA.csv'
df = pd.read_csv(file_path)

# Initialize team skills
team_skills = {team: {'mean': 25, 'std': 8.33} for team in pd.concat([df['team1'], df['team2']]).unique()}

# Set noise variance
sigma_epsilon = 2  # Experiment with different values

# Learning rate for updating skills
alpha = 0.5

# List for results
results = []


# Loop through each match

"""
for idx, row in df.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']
    score2 = row['score2']

    # Get team skill means and stddevs
    mu_s1 = team_skills[team1]['mean']
    mu_s2 = team_skills[team2]['mean']
    sigma_s1 = team_skills[team1]['std']
    sigma_s2 = team_skills[team2]['std']

    # Determine result
    result = 1 if score1 > score2 else 0
    
    # Compute conditional skills
    mean_cond, cov_cond = conditional_skills(mu_s1, mu_s2, sigma_s1, sigma_s2, result)

    # Compute conditional outcome distribution
    mean_trunc, var_trunc = conditional_outcome(mu_s1, mu_s2, sigma_s1, sigma_s2, result)

    # Compute marginal probability that team 1 wins
    p_team1_wins = marginal_p_y1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_epsilon)

    # Update team skills
    team_skills[team1]['mean'] += alpha * (mean_cond[0] - mu_s1)
    team_skills[team2]['mean'] += alpha * (mean_cond[1] - mu_s2)

    # Append results
    results.append({
        'team1': team1,
        'team2': team2,
        'score1': score1,
        'score2': score2,
        'p_team1_wins': p_team1_wins
    })

# Convert to DataFrame and print results
results_df = pd.DataFrame(results)
# print(results_df.head())
# 
# """

# Loop through each match
for idx, row in df.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']
    score2 = row['score2']

    # Get team skill means and stddevs
    mu_s1 = team_skills[team1]['mean']
    mu_s2 = team_skills[team2]['mean']
    sigma_s1 = team_skills[team1]['std']
    sigma_s2 = team_skills[team2]['std']

    # Determine result
    result = 1 if score1 > score2 else 0
    
    # Compute conditional skills
    mean_cond, cov_cond = conditional_skills(mu_s1, mu_s2, sigma_s1, sigma_s2, result)

    """
    print(f"Iteration {idx}")
    print(f"Team 1: {team1}, Team 2: {team2}")
    print(f"Old means: mu_s1 = {mu_s1}, mu_s2 = {mu_s2}")
    print(f"Conditional means: {mean_cond}")
    print(f"Conditional covariance: {cov_cond}")
    """

    # Compute conditional outcome distribution
    mean_trunc, var_trunc = conditional_outcome(mu_s1, mu_s2, sigma_s1, sigma_s2, result)
    # print(f"Truncated Gaussian mean: {mean_trunc}, variance: {var_trunc}")
    

    # Compute marginal probability that team 1 wins
    p_team1_wins = marginal_p_y1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_epsilon)
    # print(f"Marginal probability team 1 wins: {p_team1_wins}")

    # Update team skills
    team_skills[team1]['mean'] += alpha * (mean_cond[0] - mu_s1)
    team_skills[team2]['mean'] += alpha * (mean_cond[1] - mu_s2)

    # Append results
    results.append({
        'team1': team1,
        'team2': team2,
        'score1': score1,
        'score2': score2,
        'p_team1_wins': p_team1_wins
    })

# Convert to DataFrame and print results
results_df = pd.DataFrame(results)
# print(results_df.head())


(array([ 1.04545455, -1.72727273]), array([[0.95454545, 0.72727273],
       [0.72727273, 4.36363636]]))
std_diff 19.000000000000004
marginal_y = 0.6813240558830315


TypeError: conditional_skills() missing 1 required positional argument: 't'