In [None]:
import numpy as np
from scipy.stats import norm

def compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon):
    """
    Compute the probability that Player 1 wins, p(y = 1), given their means and variances.
    
    Parameters:
    mu1: float - mean of Player 1's skill
    mu2: float - mean of Player 2's skill
    sigma1: float - standard deviation of Player 1's skill
    sigma2: float - standard deviation of Player 2's skill
    sigma_epsilon: float - standard deviation of the noise in the game outcome
    
    Returns:
    p_y_1: float - probability that Player 1 wins
    """
    # Compute the mean and variance difference
    mu_diff = mu1 - mu2
    sigma_diff = np.sqrt(sigma1**2 + sigma2**2 + sigma_epsilon**2)
    
    # Compute the probability using the cumulative distribution function (CDF)
    p_y_1 = 1 - norm.cdf(-mu_diff / sigma_diff)
    
    return p_y_1

# Example usage
mu1 = 25  # mean skill of player 1
mu2 = 23  # mean skill of player 2
sigma1 = 25 / 3  # standard deviation of player 1's skill
sigma2 = 25 / 3  # standard deviation of player 2's skill
sigma_epsilon = 25 / 6  # noise in the game outcome

# Compute p(y = 1)
p_y_1 = compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon)
print(f"Probability that Player 1 wins: {p_y_1:.4f}")


In [3]:
import pandas as pd
import numpy as np
from scipy.stats import norm

def compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon):
    """
    Compute the probability that Player 1 wins, p(y = 1), given their means and variances.
    
    Parameters:
    mu1: float - mean of Player 1's skill
    mu2: float - mean of Player 2's skill
    sigma1: float - standard deviation of Player 1's skill
    sigma2: float - standard deviation of Player 2's skill
    sigma_epsilon: float - standard deviation of the noise in the game outcome
    
    Returns:
    p_y_1: float - probability that Player 1 wins
    """
    mu_diff = mu1 - mu2
    sigma_diff = np.sqrt(sigma1**2 + sigma2**2 + sigma_epsilon**2)
    
    # Compute the probability using the cumulative distribution function (CDF)
    p_y_1 = 1 - norm.cdf(-mu_diff / sigma_diff)
    
    return p_y_1

def update_skills(player1, player2, result, player_skills, sigma_epsilon):
    """
    Update the skills of two players after a match result.
    
    Parameters:
    player1: str - name of player 1
    player2: str - name of player 2
    result: int - 1 if player 1 wins, 0 if player 2 wins
    player_skills: dict - dictionary of player skills (means and stds)
    sigma_epsilon: float - standard deviation of the noise in the game outcome
    
    Returns:
    player_skills: dict - updated player skills
    """
    skill1 = player_skills[player1]
    skill2 = player_skills[player2]
    
    mean_diff = skill1['mean'] - skill2['mean']
    std_diff = np.sqrt(skill1['std']**2 + skill2['std']**2 + sigma_epsilon**2)
    
    if result == 1:
        # Player 1 wins
        outcome_mean = mean_diff + (norm.pdf(-mean_diff / std_diff) / (1 - norm.cdf(mean_diff / std_diff))) * std_diff
    else:
        # Player 2 wins
        outcome_mean = mean_diff - (norm.pdf(-mean_diff / std_diff) / (norm.cdf(mean_diff / std_diff))) * std_diff
    
    # Update player means based on the result
    player_skills[player1]['mean'] += (outcome_mean * skill1['std']**2) / (skill1['std']**2 + skill2['std']**2)
    player_skills[player2]['mean'] -= (outcome_mean * skill2['std']**2) / (skill1['std']**2 + skill2['std']**2)

    return player_skills

# Load the dataset
file_path = 'SerieA.csv'
df = pd.read_csv(file_path)

# Example skill initialization (assuming all teams start with mean 25 and variance (25/3)^2)
initial_mean = 25
initial_sigma = 25 / 3

# Dictionary to store team skills (mean and stddev)
team_skills = {}

# Initialize all team skills with the same mean and standard deviation
teams = pd.concat([df['team1'], df['team2']]).unique()
for team in teams:
    team_skills[team] = {'mean': initial_mean, 'std': initial_sigma}

# Example sigma_epsilon value (you can change it based on your assumption)
sigma_epsilon = 25 / 6

# Iterate over all matches in the dataset, compute win probabilities, and update skills
results = []
for idx, row in df.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']  # Goals scored by team1
    score2 = row['score2']  # Goals scored by team2
    
    # Determine the result: 1 if team1 wins, 0 if team2 wins
    if score1 > score2:
        result = 1
    elif score1 < score2:
        result = 0
    else:
        # If it's a draw, you might handle it differently. For now, we skip draws.
        continue
    
    # Get skill means and standard deviations for both teams
    mu1 = team_skills[team1]['mean']
    sigma1 = team_skills[team1]['std']
    
    mu2 = team_skills[team2]['mean']
    sigma2 = team_skills[team2]['std']
    
    # Compute the probability that team1 wins against team2
    p_y_1 = compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon)
    
    # Update the skills based on the match result
    team_skills = update_skills(team1, team2, result, team_skills, sigma_epsilon)
    
    # Store the result
    results.append({
        'team1': team1,
        'team2': team2,
        'p_team1_wins': p_y_1,
        'score1': score1,
        'score2': score2,
        'result': result,
        'team1_updated_mean': team_skills[team1]['mean'],
        'team2_updated_mean': team_skills[team2]['mean']
    })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


        team1     team2  p_team1_wins  score1  score2  result  \
0      Chievo  Juventus           0.5       2       3       0   
1       Lazio    Napoli           0.5       1       2       0   
2      Torino      Roma           0.5       0       1       0   
3    Sassuolo     Inter           0.5       1       0       1   
4      Empoli  Cagliari           0.5       2       0       1   
..        ...       ...           ...     ...     ...     ...   
267      Spal     Milan           NaN       2       3       0   
268      Roma     Parma           NaN       2       1       1   
269     Inter    Empoli           NaN       2       1       1   
270  Cagliari   Udinese           NaN       1       2       0   
271  Atalanta  Sassuolo           NaN       3       1       1   

     team1_updated_mean  team2_updated_mean  
0             20.013221           29.986779  
1             20.013221           29.986779  
2             20.013221           29.986779  
3             29.986779           2

  outcome_mean = mean_diff + (norm.pdf(-mean_diff / std_diff) / (1 - norm.cdf(mean_diff / std_diff))) * std_diff
  mu_diff = mu1 - mu2
  mean_diff = skill1['mean'] - skill2['mean']
  outcome_mean = mean_diff - (norm.pdf(-mean_diff / std_diff) / (norm.cdf(mean_diff / std_diff))) * std_diff


In [4]:
import pandas as pd
import numpy as np
from scipy.stats import norm

def compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon):
    """
    Compute the probability that Team 1 wins, p(y = 1), given their means and variances.
    
    Parameters:
    mu1: float - mean of Team 1's skill
    mu2: float - mean of Team 2's skill
    sigma1: float - standard deviation of Team 1's skill
    sigma2: float - standard deviation of Team 2's skill
    sigma_epsilon: float - standard deviation of the noise in the game outcome
    
    Returns:
    p_y_1: float - probability that Team 1 wins
    """
    mu_diff = mu1 - mu2
    sigma_diff = np.sqrt(sigma1**2 + sigma2**2 + sigma_epsilon**2)
    
    # Compute the probability that Team 1 wins using the CDF of the normal distribution
    p_y_1 = 1 - norm.cdf(-mu_diff / sigma_diff)
    
    return p_y_1

# Load the dataset
file_path = 'SerieA.csv'
df = pd.read_csv(file_path)

# Example skill initialization (assuming all teams start with mean 25 and variance (25/3)^2)
initial_mean = 25
initial_sigma = 25 / 3

# Dictionary to store team skills (means and stddevs)
team_skills = {}

# Initialize all team skills with the same mean and standard deviation
teams = pd.concat([df['team1'], df['team2']]).unique()
for team in teams:
    team_skills[team] = {'mean': initial_mean, 'std': initial_sigma}

# Example sigma_epsilon value (you can change it based on your assumption)
sigma_epsilon = 25 / 6

# Iterate over all matches in the dataset and compute p(y=1)
results = []
for idx, row in df.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']  # Goals scored by team1
    score2 = row['score2']  # Goals scored by team2
    
    # Get skill means and standard deviations for both teams
    mu1 = team_skills[team1]['mean']
    sigma1 = team_skills[team1]['std']
    
    mu2 = team_skills[team2]['mean']
    sigma2 = team_skills[team2]['std']
    
    # Compute the probability that team1 wins against team2
    p_team1_wins = compute_p_y_1(mu1, mu2, sigma1, sigma2, sigma_epsilon)
    
    # Store the result
    results.append({
        'team1': team1,
        'team2': team2,
        'p_team1_wins': p_team1_wins,
        'score1': score1,
        'score2': score2
    })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


          team1     team2  p_team1_wins  score1  score2
0        Chievo  Juventus           0.5       2       3
1         Lazio    Napoli           0.5       1       2
2        Torino      Roma           0.5       0       1
3      Sassuolo     Inter           0.5       1       0
4         Parma   Udinese           0.5       2       2
..          ...       ...           ...     ...     ...
375        Roma     Parma           0.5       2       1
376       Inter    Empoli           0.5       2       1
377  Fiorentina     Genoa           0.5       0       0
378    Cagliari   Udinese           0.5       1       2
379    Atalanta  Sassuolo           0.5       3       1

[380 rows x 5 columns]
