In [1]:
# Let's first load and preview the data from the uploaded SerieA.csv file
import pandas as pd
import numpy as np
from collections import defaultdict
from scipy.stats import truncnorm
from scipy.stats import norm

# Path to the uploaded file
file_path = 'SerieA.csv'

# Load the data
serie_a_data = pd.read_csv(file_path)

# Preview the first few rows of the dataset
serie_a_data.head()


Unnamed: 0,yyyy-mm-dd,HH:MM,team1,team2,score1,score2
0,2018-08-18,18:00,Chievo,Juventus,2,3
1,2018-08-18,20:30,Lazio,Napoli,1,2
2,2018-08-19,18:00,Torino,Roma,0,1
3,2018-08-19,20:30,Sassuolo,Inter,1,0
4,2018-08-19,20:30,Parma,Udinese,2,2


In [3]:

# Initialize player skills as Gaussian (mean = 25, std = 8)
initial_mean = 25
initial_std = 8

# Define a dictionary to hold player skills
player_skills = defaultdict(lambda: np.random.normal(initial_mean, initial_std))

# Function to update player skills based on match result
def update_skills(player1, player2, score1, score2):
    skill1 = player_skills[player1]
    skill2 = player_skills[player2]
    
    # Compute outcome difference as Gaussian random variable
    outcome = np.random.normal(skill1 - skill2, 1)  # Outcome difference with some noise
    
    # Determine match result based on the outcome
    if score1 > score2:
        result = 1  # Player 1 wins
    elif score1 < score2:
        result = -1  # Player 2 wins
    else:
        result = 0  # Draw
    
    # Update skills using a simple adjustment (we can expand this later for full Bayesian update)
    k = 0.1  # Learning rate for skill adjustment
    if result == 1:
        player_skills[player1] += k * (skill2 - skill1)
        player_skills[player2] -= k * (skill2 - skill1)
    elif result == -1:
        player_skills[player2] += k * (skill1 - skill2)
        player_skills[player1] -= k * (skill1 - skill2)

# Apply skill updates based on all matches in the dataset
for idx, row in serie_a_data.iterrows():
    team1, team2 = row['team1'], row['team2']
    score1, score2 = row['score1'], row['score2']
    update_skills(team1, team2, score1, score2)

# Display a few player skills after the update
updated_skills = {player: skill for player, skill in list(player_skills.items())[:10]}
updated_skills


{'Chievo': 26.84560877370682,
 'Juventus': 25.871860198072792,
 'Lazio': 25.653454424420996,
 'Napoli': 25.90114666043628,
 'Torino': 26.377327391988484,
 'Roma': 25.272950347010422,
 'Sassuolo': 25.2042760526244,
 'Inter': 25.559633397798223,
 'Parma': 25.31929244482015,
 'Udinese': 25.83246006095596}

In [4]:

# Function to compute truncated Gaussian distribution p(t | s1, s2, y)
def truncated_gaussian(skill1, skill2, result):
    mean_diff = skill1 - skill2
    std_dev = 1  # Assumed standard deviation for the outcome variable

    # If Player 1 wins (y = 1), we truncate the Gaussian to values greater than 0
    if result == 1:
        a, b = 0, np.inf  # Truncate to (0, ∞)
    elif result == -1:
        a, b = -np.inf, 0  # Truncate to (-∞, 0)
    else:
        a, b = -np.inf, np.inf  # No truncation for a draw

    # Truncated Gaussian distribution
    truncated_gaussian_dist = truncnorm(a=(a - mean_diff) / std_dev, b=(b - mean_diff) / std_dev, loc=mean_diff, scale=std_dev)
    return truncated_gaussian_dist

# Example: Let's compute the truncated Gaussian for a match between Chievo and Juventus where Chievo lost
team1 = 'Chievo'
team2 = 'Juventus'
score1 = 2
score2 = 3
result = 1 if score1 > score2 else -1

# Get the current skills of the teams
skill1 = player_skills[team1]
skill2 = player_skills[team2]

# Compute the truncated Gaussian distribution for this match
trunc_gauss_dist = truncated_gaussian(skill1, skill2, result)

# Compute mean and variance of the truncated distribution
truncated_mean = trunc_gauss_dist.mean()
truncated_variance = trunc_gauss_dist.var()

truncated_mean, truncated_variance


(np.float64(-0.5304024039828856), np.float64(0.2021947044579756))

In [5]:


# Function to compute the marginal probability that Player 1 wins (p(y = 1))
def prob_player1_wins(skill1, skill2):
    mean_diff = skill1 - skill2
    std_dev = 1  # Standard deviation for outcome variable

    # The probability that Player 1 wins is the probability that t > 0
    # Where t ~ N(s1 - s2, 1), hence we calculate P(t > 0)
    prob_win = 1 - norm.cdf(0, loc=mean_diff, scale=std_dev)
    return prob_win

# Compute the probability that Chievo wins against Juventus
prob_chievo_wins = prob_player1_wins(skill1, skill2)
prob_chievo_wins


np.float64(0.8349093039972402)

In [6]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm, norm

# Load the dataset
data = pd.read_csv('SerieA.csv')

# Initializing player skills, assuming all players start with a mean skill of 25 and standard deviation of 8
players = pd.concat([data['team1'], data['team2']]).unique()
player_skills = {player: {'mean': 25, 'std': 8} for player in players}

# Data processing to simplify the game outcomes
data['result'] = np.where(data['score1'] > data['score2'], 1, -1)  # Player 1 wins if score1 > score2, else Player 2 wins


In [7]:
def update_skills(player1, player2, result, player_skills, outcome_var=1):
    skill1 = player_skills[player1]
    skill2 = player_skills[player2]
    
    mean_diff = skill1['mean'] - skill2['mean']
    std_diff = np.sqrt(skill1['std']**2 + skill2['std']**2 + outcome_var)

    # If player 1 wins, we truncate to t > 0, otherwise to t < 0
    if result == 1:
        a, b = 0, np.inf  # Truncate the distribution for player 1 winning
    else:
        a, b = -np.inf, 0  # Truncate the distribution for player 2 winning
    
    # Truncated Gaussian distribution for the match outcome
    trunc_gauss_dist = truncnorm(a=(a - mean_diff) / std_diff, b=(b - mean_diff) / std_diff, loc=mean_diff, scale=std_diff)
    
    # Update player skills based on the match outcome
    outcome_mean = trunc_gauss_dist.mean()
    skill1_update = skill1['mean'] + outcome_mean * (skill1['std']**2) / (skill1['std']**2 + skill2['std']**2)
    skill2_update = skill2['mean'] - outcome_mean * (skill2['std']**2) / (skill1['std']**2 + skill2['std']**2)
    
    player_skills[player1]['mean'] = skill1_update
    player_skills[player2]['mean'] = skill2_update

    return player_skills


In [8]:
def prob_player1_wins(skill1, skill2):
    mean_diff = skill1['mean'] - skill2['mean']
    std_diff = np.sqrt(skill1['std']**2 + skill2['std']**2)
    
    # The probability of Player 1 winning is P(t > 0)
    return 1 - norm.cdf(0, loc=mean_diff, scale=std_diff)

# Example usage for a specific match
team1 = 'Chievo'
team2 = 'Juventus'

# Compute the probability of Chievo winning
prob_chievo_wins = prob_player1_wins(player_skills[team1], player_skills[team2])
print(f"Probability of {team1} winning against {team2}: {prob_chievo_wins:.4f}")


Probability of Chievo winning against Juventus: 0.5000


In [9]:
for _, row in data.iterrows():
    team1 = row['team1']
    team2 = row['team2']
    result = row['result']
    
    # Update player skills based on the result of the match
    player_skills = update_skills(team1, team2, result, player_skills)
    
    # Optionally, print the updated skills after each match
    print(f"Updated skills: {team1} = {player_skills[team1]['mean']:.2f}, {team2} = {player_skills[team2]['mean']:.2f}")


Updated skills: Chievo = 20.47, Juventus = 29.53
Updated skills: Lazio = 20.47, Napoli = 29.53
Updated skills: Torino = 20.47, Roma = 29.53
Updated skills: Sassuolo = 29.53, Inter = 20.47
Updated skills: Parma = 20.47, Udinese = 29.53
Updated skills: Empoli = 29.53, Cagliari = 20.47
Updated skills: Bologna = 20.47, Spal = 29.53
Updated skills: Atalanta = 29.53, Frosinone = 20.47
Updated skills: Juventus = 36.15, Lazio = 13.85
Updated skills: Napoli = 34.99, Milan = 19.54
Updated skills: Spal = 36.15, Parma = 13.85
Updated skills: Udinese = 34.99, Sampdoria = 19.54
Updated skills: Inter = 15.94, Torino = 25.00
Updated skills: Genoa = 28.80, Empoli = 25.73
Updated skills: Frosinone = 15.94, Bologna = 25.00
Updated skills: Fiorentina = 30.46, Chievo = 15.01
Updated skills: Cagliari = 13.85, Sassuolo = 36.15
Updated skills: Roma = 25.00, Atalanta = 34.06
Updated skills: Milan = 23.21, Roma = 21.33
Updated skills: Bologna = 21.78, Inter = 19.16
Updated skills: Parma = 2.35, Juventus = 47.65

  g1 = mu3 / np.power(mu2, 1.5)


Updated skills: Juventus = 7619.77, Parma = -148.09
Updated skills: Spal = -3630.42, Torino = 2733.40
Updated skills: Udinese = -825.98, Fiorentina = 2352.01
Updated skills: Genoa = -3472.48, Sassuolo = 4095.93
Updated skills: Inter = -376.04, Bologna = -2581.90
Updated skills: Roma = -1641.19, Milan = 4185.18
Updated skills: Frosinone = -5432.57, Lazio = -1392.27
Updated skills: Cagliari = -5038.77, Atalanta = 3468.05
Updated skills: Lazio = -1392.24, Empoli = 1003.89
Updated skills: Chievo = -4285.89, Roma = -759.62
Updated skills: Fiorentina = 647.95, Napoli = 7464.19
Updated skills: Parma = -148.37, Inter = -375.76
Updated skills: Bologna = -2581.97, Genoa = -3472.40
Updated skills: Torino = 4513.09, Udinese = -2605.67
Updated skills: Sampdoria = -2774.37, Frosinone = -5432.54
Updated skills: Atalanta = 7017.28, Spal = -7179.66
Updated skills: Sassuolo = 2334.01, Juventus = 9381.69
Updated skills: Milan = 8797.16, Cagliari = -9650.74
Updated skills: Juventus = 16788.81, Frosinone =