In [2]:
import numpy as np
import pandas as pd
from scipy.stats import norm

# Function to update the skills based on the match outcome
def update_skills(team1, team2, result, team_skills, outcome_var=1):
    """
    Updates the skill means of two teams after a match using truncated Gaussian distribution.

    team1, team2: Names or indexes of teams in team_skills.
    result: 1 if team1 wins, -1 if team2 wins.
    team_skills: Dictionary of teams with their skill means and standard deviations.
    outcome_var: Variance of the match outcome (default is 1).
    """
    
    skill1 = team_skills[team1]
    skill2 = team_skills[team2]
    
    mu_diff = skill1['mean'] - skill2['mean']
    sigma_diff = np.sqrt(skill1['std']**2 + skill2['std']**2 + outcome_var)
    
    if result == 1:
        # Team 1 wins (truncate at t > 0)
        mu_trunc = mu_diff + (norm.pdf(-mu_diff / sigma_diff) / norm.cdf(mu_diff / sigma_diff)) * sigma_diff
    else:
        # Team 2 wins (truncate at t < 0)
        mu_trunc = mu_diff - (norm.pdf(-mu_diff / sigma_diff) / (1 - norm.cdf(mu_diff / sigma_diff))) * sigma_diff
    
    # Skill updates for team1 and team2 based on truncated outcome
    skill1_update = skill1['mean'] + (mu_trunc * (skill1['std']**2) / (skill1['std']**2 + skill2['std']**2))
    skill2_update = skill2['mean'] - (mu_trunc * (skill2['std']**2) / (skill1['std']**2 + skill2['std']**2))
    
    team_skills[team1]['mean'] = skill1_update
    team_skills[team2]['mean'] = skill2_update
    
    return team_skills

# Load the SerieA dataset
file_path = 'SerieA.csv'
data = pd.read_csv(file_path)

# Extract unique team names from the dataset
teams = pd.concat([data['team1'], data['team2']]).unique()

# Initialize the skills for all teams
initial_mean = 25
initial_std = 25 / 3
team_skills = {team: {'mean': initial_mean, 'std': initial_std} for team in teams}

# Iterate over each match in the dataset
for index, row in data.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['score1'] - row['score2']  # Positive result means team1 wins, negative means team2 wins
    
    if result > 0:
        match_result = 1  # Team 1 wins
    else:
        match_result = -1  # Team 2 wins
    
    # Update skills based on match result
    team_skills = update_skills(team1, team2, match_result, team_skills)

# Print out the updated skills for each team
for team, skill in team_skills.items():
    print(f"Team: {team}, Updated Skill Mean: {skill['mean']:.2f}, Skill StdDev: {skill['std']:.2f}")


Team: Chievo, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Lazio, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Torino, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Sassuolo, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Parma, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Empoli, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Bologna, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Atalanta, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Juventus, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Napoli, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Spal, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Udinese, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Inter, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Genoa, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Frosinone, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Fiorentina, Updated Skill Mean: nan, Skill StdDev: 8.33
Team: Cagliari, Updated Skill Mean: nan, Skill StdDev: 8.33
Tea

  mu_trunc = mu_diff - (norm.pdf(-mu_diff / sigma_diff) / (1 - norm.cdf(mu_diff / sigma_diff))) * sigma_diff
  mu_trunc = mu_diff - (norm.pdf(-mu_diff / sigma_diff) / (1 - norm.cdf(mu_diff / sigma_diff))) * sigma_diff


In [4]:
import numpy as np
import pandas as pd
from scipy.stats import norm

# Function to update the skills based on the match outcome
def update_skills(team1, team2, result, team_skills, outcome_var=1, epsilon=1e-5):
    """
    Updates the skill means of two teams after a match using truncated Gaussian distribution.

    team1, team2: Names or indexes of teams in team_skills.
    result: 1 if team1 wins, -1 if team2 wins.
    team_skills: Dictionary of teams with their skill means and standard deviations.
    outcome_var: Variance of the match outcome (default is 1).
    epsilon: A small value to prevent division by zero or undefined operations.
    """
    
    skill1 = team_skills[team1]
    skill2 = team_skills[team2]
    
    mu_diff = skill1['mean'] - skill2['mean']
    sigma_diff = np.sqrt(skill1['std']**2 + skill2['std']**2 + outcome_var)
    
    # Ensure the standard deviation isn't too small
    if sigma_diff < epsilon:
        sigma_diff = epsilon
    
    if result == 1:
        # Team 1 wins (truncate at t > 0)
        cdf_value = norm.cdf(mu_diff / sigma_diff)
        if cdf_value == 0:
            cdf_value = epsilon  # Avoid division by zero
        mu_trunc = mu_diff + (norm.pdf(-mu_diff / sigma_diff) / cdf_value) * sigma_diff
    else:
        # Team 2 wins (truncate at t < 0)
        cdf_value = norm.cdf(mu_diff / sigma_diff)
        if cdf_value == 1:
            cdf_value = 1 - epsilon  # Avoid division by zero
        mu_trunc = mu_diff - (norm.pdf(-mu_diff / sigma_diff) / (1 - cdf_value)) * sigma_diff
    
    # Skill updates for team1 and team2 based on truncated outcome
    skill1_update = skill1['mean'] + (mu_trunc * (skill1['std']**2) / (skill1['std']**2 + skill2['std']**2))
    skill2_update = skill2['mean'] - (mu_trunc * (skill2['std']**2) / (skill1['std']**2 + skill2['std']**2))
    
    team_skills[team1]['mean'] = skill1_update
    team_skills[team2]['mean'] = skill2_update
    
    return team_skills

# Load the SerieA dataset
file_path = 'SerieA.csv'
data = pd.read_csv(file_path)

# Extract unique team names from the dataset
teams = pd.concat([data['team1'], data['team2']]).unique()

# Initialize the skills for all teams
initial_mean = 25
initial_std = 25 / 3
team_skills = {team: {'mean': initial_mean, 'std': initial_std} for team in teams}

# Iterate over each match in the dataset
for index, row in data.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['score1'] - row['score2']  # Positive result means team1 wins, negative means team2 wins
    
    if result > 0:
        match_result = 1  # Team 1 wins
    else:
        match_result = -1  # Team 2 wins
    
    # Update skills based on match result
    team_skills = update_skills(team1, team2, match_result, team_skills)

# Print out the updated skills for each team
for team, skill in team_skills.items():
    print(f"Team: {team}, Updated Skill Mean: {skill['mean']:.2f}, Skill StdDev: {skill['std']:.2f}")


Team: Chievo, Updated Skill Mean: -5266165.81, Skill StdDev: 8.33
Team: Lazio, Updated Skill Mean: 3811735.89, Skill StdDev: 8.33
Team: Torino, Updated Skill Mean: 3002380.69, Skill StdDev: 8.33
Team: Sassuolo, Updated Skill Mean: -286358.59, Skill StdDev: 8.33
Team: Parma, Updated Skill Mean: 3149782.89, Skill StdDev: 8.33
Team: Empoli, Updated Skill Mean: -1637854.05, Skill StdDev: 8.33
Team: Bologna, Updated Skill Mean: -4662732.59, Skill StdDev: 8.33
Team: Atalanta, Updated Skill Mean: 1690268.15, Skill StdDev: 8.33
Team: Juventus, Updated Skill Mean: -181020.61, Skill StdDev: 8.33
Team: Napoli, Updated Skill Mean: 5741822.61, Skill StdDev: 8.33
Team: Spal, Updated Skill Mean: -940261.53, Skill StdDev: 8.33
Team: Udinese, Updated Skill Mean: 2948128.47, Skill StdDev: 8.33
Team: Inter, Updated Skill Mean: 5169727.13, Skill StdDev: 8.33
Team: Genoa, Updated Skill Mean: -5895233.70, Skill StdDev: 8.33
Team: Frosinone, Updated Skill Mean: -975521.54, Skill StdDev: 8.33
Team: Fiorentina