In [1]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm, norm

# Conditional skills
def conditional_skills(mu_s1, mu_s2, sigma_s1, sigma_s2, t, outcome_var):
    mean_s = np.array([mu_s1, mu_s2])
    cov_s = np.array([[sigma_s1, 0],
                      [0, sigma_s2]])  # Covariance matrix with S_12 and S_21
    outcome_mean = mu_s1 - mu_s2
    outcome_var = sigma_s1 + sigma_s2 + outcome_var # Outcome variance

    # Conditional mean update
    mean_cond = mean_s + (t - outcome_mean) / outcome_var * np.array([sigma_s1, -sigma_s2])

    # Conditional covariance update
    cov_cond = cov_s - np.outer([sigma_s1, -sigma_s2], [sigma_s1, -sigma_s2]) / outcome_var
  
    return mean_cond, cov_cond

print("conditional_skills:",conditional_skills(1, -1, 1, 4, 3, 5))


# Conditional outcome (truncated Gaussian)
def conditional_outcome(mu_s1, mu_s2, sigma_s1, sigma_s2, result, outcome_var):
    mean_diff = mu_s1 - mu_s2
    outcome_var = sigma_s1 + sigma_s2 + outcome_var
    std_diff = np.sqrt(outcome_var)

    # Set truncation limits based on match outcome
    if result == 1:
        a, b = 0, np.inf  # Truncate for team 1 winning
    else:
        a, b = -np.inf, 0  # Truncate for team 2 winning

    # Create truncated Gaussian distribution
    trunc_gauss_dist = truncnorm(a=(a - mean_diff) / std_diff, b=(b - mean_diff) / std_diff, loc=mean_diff, scale=std_diff)
    
    return a,b,#trunc_gauss_dist.mean(), trunc_gauss_dist.var()  # Return mean and variance of truncated Gaussian

print("std_diff", conditional_outcome(2, 1, -1, 4, 1,5))

# Marginal probability that team 1 wins
def marginal_p_y1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_epsilon):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_epsilon)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1

print("marginal_y =", marginal_p_y1(1, -1, 1, 4,5))



# Load the dataset
file_path = 'SerieA.csv'  # Adjust the file path as necessary
df = pd.read_csv(file_path)

# Initialize team skills
team_skills = {team: {'mean': 25, 'std': 8.33} for team in pd.concat([df['team1'], df['team2']]).unique()}

# Set noise variance
sigma_epsilon = 2  # Experiment with different values

# Learning rate for updating skills
alpha = 0.5

# List for results
results = []

# Loop through each match
for idx, row in df.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']
    score2 = row['score2']

    # Get team skill means and stddevs
    mu_s1 = team_skills[team1]['mean']
    mu_s2 = team_skills[team2]['mean']
    sigma_s1 = team_skills[team1]['std']
    sigma_s2 = team_skills[team2]['std']

    # Determine result
    result = 1 if score1 > score2 else 0
    
    # Compute conditional skills
    mean_cond, cov_cond = conditional_skills(mu_s1, mu_s2, sigma_s1, sigma_s2, result)

    # Compute conditional outcome distribution
    mean_trunc, var_trunc = conditional_outcome(mu_s1, mu_s2, sigma_s1, sigma_s2, result)
    
    # Compute marginal probability that team 1 wins
    p_team1_wins = marginal_p_y1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_epsilon)

    # Update team skills
    team_skills[team1]['mean'] += alpha * (mean_cond[0] - mu_s1)
    team_skills[team2]['mean'] += alpha * (mean_cond[1] - mu_s2)

    # Append results
    results.append({
        'team1': team1,
        'team2': team2,
        'score1': score1,
        'score2': score2,
        'p_team1_wins': p_team1_wins
    })

# Convert to DataFrame and print results
results_df = pd.DataFrame(results)
print(results_df.head())


conditional_skills: (array([ 1.1, -1.4]), array([[0.9, 0.4],
       [0.4, 2.4]]))
std_diff (0, inf)
marginal_y = 0.736455371567231


TypeError: conditional_skills() missing 1 required positional argument: 'outcome_var'