In [6]:
import numpy as np
import pandas as pd

# Load data
df_results = pd.read_csv('../data/results.csv')
df_stats = pd.read_csv('../data/stats.csv')

# Function to initialize ratings
def initialize_ratings(df_results, df_stats):
    ratings = {}

    # Iterate over teams in the results data
    teams = set(df_results['home_team']).union(set(df_results['away_team']))
    for team in teams:
        # Initialize background ratings for home and away
        ratings[team] = {
            'brH': 0.0,
            'brA': 0.0,
            'prH': 0.0,
            'prA': 0.0,
            'continuous_underperformances': 0,
            'continuous_overperformances': 0
        }

        # Initialize other required variables for each team
        # ...

    return ratings

# Function to update ratings based on results data
def update_ratings(df_results, ratings):
    learning_rate_lambda = 0.054
    diminishing_function_psi = lambda error: 3 * np.log10(1 + error)
    learning_rate_gamma = 0.79
    form_threshold_phi = 1
    rating_impact_mu = 0.01
    diminishing_factor_delta = 2.5

    # Iterate over each match in the results data
    for index, row in df_results.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        observed_goal_difference = row['home_goals'] - row['away_goals']

        # Calculate expected goal difference based on ratings
        expected_goal_difference = ratings[home_team]['brH'] - ratings[away_team]['brA']

        # Calculate the error between observed and expected goal difference
        error = abs(observed_goal_difference - expected_goal_difference)

        # Update the home team ratings
        ratings[home_team]['brH'] += diminishing_function_psi(error) * learning_rate_lambda
        ratings[home_team]['brA'] += (ratings[home_team]['brH'] - ratings[home_team]['brA']) * learning_rate_gamma

        # Update the away team ratings
        ratings[away_team]['brA'] += diminishing_function_psi(error) * learning_rate_lambda
        ratings[away_team]['brH'] += (ratings[away_team]['brA'] - ratings[away_team]['brH']) * learning_rate_gamma

        # Update the continuous under/over-performance counters
        if observed_goal_difference < expected_goal_difference:
            ratings[home_team]['continuous_underperformances'] += 1
            ratings[away_team]['continuous_overperformances'] += 1
        elif observed_goal_difference > expected_goal_difference:
            ratings[home_team]['continuous_overperformances'] += 1
            ratings[away_team]['continuous_underperformances'] += 1
        else:
            ratings[home_team]['continuous_underperformances'] = 0
            ratings[away_team]['continuous_underperformances'] = 0

        # Update the provisional ratings based on form factor
        if ratings[home_team]['continuous_underperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_underperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_underperformances'] - form_threshold_phi)
            ratings[home_team]['prH'] = ratings[home_team]['brH'] - form_impact
        elif ratings[home_team]['continuous_overperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_overperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_overperformances'] - form_threshold_phi)
            ratings[home_team]['prH'] = ratings[home_team]['brH'] + form_impact
        else:
            ratings[home_team]['prH'] = ratings[home_team]['brH']

        if ratings[away_team]['continuous_underperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_underperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_underperformances'] - form_threshold_phi)
            ratings[away_team]['prA'] = ratings[away_team]['brA'] - form_impact
        elif ratings[away_team]['continuous_overperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_overperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_overperformances'] - form_threshold_phi)
            ratings[away_team]['prA'] = ratings[away_team]['brA'] + form_impact
        else:
            ratings[away_team]['prA'] = ratings[away_team]['brA']

    return ratings

# Function to calculate probabilities based on ratings
def calculate_probabilities(ratings):
    probabilities = {}

    # Iterate over teams and calculate probabilities
    for team in ratings:
        home_rating = ratings[team]['prH']
        away_rating = ratings[team]['prA']

        # Calculate rating difference
        rating_difference = home_rating - away_rating

        # Apply logistic function to estimate probabilities
        home_win_prob = logistic_function(rating_difference)
        draw_prob = logistic_function(0)
        away_win_prob = 1 - home_win_prob - draw_prob

        # Store probabilities for the team
        probabilities[team] = {
            'home_win': home_win_prob,
            'draw': draw_prob,
            'away_win': away_win_prob
        }

    return probabilities

# Logistic function
def logistic_function(x):
    return 1 / (1 + np.exp(-x))

# Main function
def main():
    # Initialize ratings
    ratings = initialize_ratings(df_results, df_stats)

    # Update ratings based on results data
    ratings = update_ratings(df_results, ratings)

    # Calculate probabilities based on ratings
    probabilities = calculate_probabilities(ratings)

    # Print or use the probabilities as needed
    print(probabilities)

# Call the main function
if __name__ == '__main__':
    main()



{'Blackpool': {'home_win': 0.4753151468452336, 'draw': 0.5, 'away_win': 0.024684853154766317}, 'Leicester City': {'home_win': 0.46780445789253133, 'draw': 0.5, 'away_win': 0.032195542107468667}, 'Tottenham Hotspur': {'home_win': 0.5639451247546362, 'draw': 0.5, 'away_win': -0.0639451247546362}, 'AFC Bournemouth': {'home_win': 0.4856877246248091, 'draw': 0.5, 'away_win': 0.014312275375190975}, 'Fulham': {'home_win': 0.5376525159355098, 'draw': 0.5, 'away_win': -0.03765251593550978}, 'Norwich City': {'home_win': 0.48135327230457603, 'draw': 0.5, 'away_win': 0.01864672769542397}, 'Middlesbrough': {'home_win': 0.45892705432900144, 'draw': 0.5, 'away_win': 0.04107294567099862}, 'West Bromwich Albion': {'home_win': 0.47291327933240607, 'draw': 0.5, 'away_win': 0.02708672066759399}, 'Huddersfield Town': {'home_win': 0.5442551156334205, 'draw': 0.5, 'away_win': -0.04425511563342055}, 'Swansea City': {'home_win': 0.5176875418456047, 'draw': 0.5, 'away_win': -0.017687541845604726}, 'Everton': {'