In [244]:
import numpy as np
import pandas as pd
import math
from scipy.stats import norm

In [245]:
# Function to initialize ratings
def initialize_ratings(df_results, df_fixtures):
    ratings = {}

    # Iterate over teams in the results data
    teams = set(df_results['home_team']).union(set(df_results['away_team'])).union(set(df_fixtures['home_team'])).union(set(df_fixtures['away_team']))
    for team in teams:
        # Initialize ratings for each team
        ratings[team] = {
            'brH': 0.0,
            'brA': 0.0,
            'continuous_overunderperformances': 0
        }
    print('Teams:', teams, end='\n\n')
    return ratings

In [246]:
# Function to update ratings based on results data
def update_ratings(df_results, ratings):

    #lambda: Determines to what extent the new match results influence the team ratings (could be improved to include temporal difference between matches)
    learning_rate_lambda = 0.054

    #psi: diminish the impact each additional goal difference error has on team ratings
    diminishing_function_psi = lambda error: 3 * np.log10(1 + error)

    #gamma: determines to what extent performances at the home grounds influence away team ratings and vice versa
    learning_rate_gamma = 0.79

    # Iterate over each match in the results data
    for index, row in df_results.iterrows():
        print("Game Nr:", index)

        if abs(ratings['Wigan Athletic']['brH']) > 1:
            print("Wigan Athletic has reached |brH| >1 at game:", index)

        if index == 2000:
            break

        home_team = row['home_team']
        away_team = row['away_team']
        #print("home_team:", home_team)
        #print("away_team:", away_team)

        if home_team == 'Wigan Athletic' or away_team == 'Wigan Athletic':
            continue


        observed_goal_difference = row['home_goals'] - row['away_goals']
        #print("observed_goal_difference:", observed_goal_difference)

        #Calculate expected goals for home team
        #expected_goal_x = round((10 ** (abs(ratings[home_team]['brH']) / 3)) - 1,5)
        expected_goal_x_temp = abs(ratings[home_team]['brH']) / 3
        expected_goal_x = np.power(10, expected_goal_x_temp) - 1
        #print("expected_goal_x:", expected_goal_x)

        # Calculate expected goals for away team
        #expected_goal_y = round((10 ** (abs(ratings[away_team]['brA']) / 3)) - 1,5)
        expected_goal_y_temp = abs(ratings[away_team]['brA']) / 3
        expected_goal_y = np.power(10, expected_goal_y_temp) - 1
        #print("expected_goal_y:", expected_goal_y)

        # Calculate expected goal difference based on ratings
        expected_goal_difference = expected_goal_x - expected_goal_y
        #print("expected_goal_difference:", expected_goal_difference)


        # Calculate the error between observed and expected goal difference
        error = abs(observed_goal_difference - expected_goal_difference)
        #print("error:", error)

        psi_temp = diminishing_function_psi(error)

        # Diminish the impact of the goal difference error for both teams x and y respectively
        if (expected_goal_difference < observed_goal_difference):
            diminishing_function_psi_x = psi_temp
            diminishing_function_psi_y = -psi_temp
        else:
            diminishing_function_psi_x = -psi_temp
            diminishing_function_psi_y = psi_temp
        #print("diminishing_function_psi_x:", diminishing_function_psi_x)
        #print("diminishing_function_psi_y:", diminishing_function_psi_y)

        # Update the home team ratings
        previous_home_rating_x = ratings[home_team]['brH']
        previous_away_rating_x = ratings[home_team]['brA']
        #print("previous_home_rating:", previous_home_rating_x)
        #print("previous_away_rating:", previous_away_rating_x)

        """
        temp_1 = previous_home_rating_x + diminishing_function_psi_x * learning_rate_lambda
        temp_2 = previous_away_rating_x + (ratings[home_team]['brH'] - previous_home_rating_x) * learning_rate_gamma
        """

        ratings[home_team]['brH'] = previous_home_rating_x + diminishing_function_psi_x * learning_rate_lambda
        ratings[home_team]['brA'] = previous_away_rating_x + (ratings[home_team]['brH'] - previous_home_rating_x) * learning_rate_gamma
        #print("ratings[home_team]['brH']:", ratings[home_team]['brH'])
        #print("ratings[home_team]['brA']:", ratings[home_team]['brA'])

        # Update the away team ratings
        previous_home_rating_y = ratings[away_team]['brH']
        previous_away_rating_y = ratings[away_team]['brA']
        #print("previous_home_rating:", previous_home_rating_y)
        #print("previous_away_rating:", previous_away_rating_y)

        ratings[away_team]['brA'] = previous_away_rating_y + diminishing_function_psi_y * learning_rate_lambda
        ratings[away_team]['brH'] = previous_home_rating_y + (ratings[away_team]['brA'] - previous_away_rating_y) * learning_rate_gamma
        #print("ratings[away_team]['brH']:", ratings[away_team]['brH'])
        #print("ratings[away_team]['brA']:", ratings[away_team]['brA'])

        #print("home team: previous overunderperformance:", ratings[home_team]['continuous_overunderperformances'])
        #print("away team: previous overunderperformance:", ratings[away_team]['continuous_overunderperformances'])

        # Update the continuous over/underperformances for the home team
        if (observed_goal_difference > expected_goal_difference):
            ratings[home_team]['continuous_overunderperformances'] = max(1, ratings[home_team]['continuous_overunderperformances'] + 1)
            ratings[away_team]['continuous_overunderperformances'] = min(-1, ratings[away_team]['continuous_overunderperformances'] - 1)
        elif (observed_goal_difference < expected_goal_difference):
            ratings[home_team]['continuous_overunderperformances'] = min(-1, ratings[home_team]['continuous_overunderperformances'] - 1)
            ratings[away_team]['continuous_overunderperformances'] = max(1, ratings[away_team]['continuous_overunderperformances'] + 1)

        #print("home team: updated overunderperformance:", ratings[home_team]['continuous_overunderperformances'])
        #print("away team: updated overunderperformance:", ratings[away_team]['continuous_overunderperformances'], end='\n\n')


        #What is supposed to happen in the rare case of observed goals = expected goals?

        """"
        #printing to check
        print("Home Team:", home_team)
        print("Away Team:", away_team)
        print("Observed Goal Difference:", observed_goal_difference)
        print("Expected Goal Difference:", expected_goal_difference)
        print("Error:", error)
        print("Background Rating x Home:", ratings[home_team]['brH'])
        print("Background Rating x Away:", ratings[home_team]['brA'])
        print("Background Rating y Home:", ratings[away_team]['brH'])
        print("Background Rating y Away:", ratings[away_team]['brA'])
        print("Continuous Over/Underperformances x:", ratings[home_team]['continuous_overunderperformances'])
        print("Continuous Over/Underperformances y:", ratings[away_team]['continuous_overunderperformances'])
        """


    return ratings



In [247]:
# Function to calculate the rating difference between two teams
def calculate_rating_difference(df_fixtures, ratings):

    rating_differences = []

     #phi: Represents the number of continuous performances, above or below expectations, which do not trigger the form factor
    form_threshold_phi = 1

    #mu: represents the rating difference used to establish provisional ratings from background ratings
    rating_impact_mu = 0.01

    #delta: the level by which rating impact μ diminishes with each additional continuous over/under-performance
    diminishing_factor_delta = 2.5

    # Iterate over each match in the results data
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']

        # Calculate performance factor for home team
        a = ratings[home_team]['continuous_overunderperformances'] - form_threshold_phi
        b = a ** diminishing_factor_delta
        if (a == 0):
            form_factor_home = 0
        else:
            form_factor_home = a / b

        # Calculate home team rating
        home_rating = ratings[home_team]['brH']
        if (ratings[home_team]['continuous_overunderperformances'] > form_threshold_phi):
            home_rating = ratings[home_team]['brH'] + rating_impact_mu * form_factor_home
        if (ratings[home_team]['continuous_overunderperformances'] < -form_threshold_phi):
            home_rating = ratings[home_team]['brH'] - rating_impact_mu * form_factor_home

        # Calculate performance factor for away team
        c = ratings[away_team]['continuous_overunderperformances'] - form_threshold_phi
        d = c ** diminishing_factor_delta
        if (c == 0):
            form_factor_away = 0
        else:
            form_factor_away = c / d

        # Calculate away team rating
        away_rating = ratings[away_team]['brA']
        if (ratings[away_team]['continuous_overunderperformances'] > form_threshold_phi):
            away_rating = ratings[away_team]['brA'] + rating_impact_mu * form_factor_away
        if (ratings[away_team]['continuous_overunderperformances'] < -form_threshold_phi):
            away_rating = ratings[away_team]['brA'] - rating_impact_mu * form_factor_away

        rating_difference = home_rating - away_rating

        rating_differences.append(rating_difference)

    print("Rating differences:", rating_differences, end='\n\n')


    return rating_differences

In [248]:
def calculate_probabilities(rating_difference):

    mean_rating_difference = 0.4  # Adjust this value based on your data
    standard_deviation = 2.0  # Adjust this value based on your data

    home_win_probability = norm.cdf(rating_difference, loc=mean_rating_difference, scale=standard_deviation)

    draw_probability = norm.cdf(0, loc=mean_rating_difference, scale=standard_deviation)

    away_win_probability = 1 - home_win_probability - draw_probability


    return home_win_probability, draw_probability, away_win_probability

In [249]:
def predict_outcomes(df_fixtures, ratings):
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        rating_difference = calculate_rating_difference(df_fixtures, ratings)

        home_win_prob, draw_prob, away_win_prob = calculate_probabilities(rating_difference)

        print(f"{home_team} - {away_team}: Outcome Predictions")
        print(f"Home Win: {home_win_prob}")
        print(f"Draw: {draw_prob}")
        print(f"Away Win: {away_win_prob}", end='\n\n')

        # Compare with the actual result from the dataset and output correct or false
        # ...

In [250]:
def rps(probs, outcome):
    cum_probs = np.cumsum(probs)
    cum_outcomes = np.cumsum(outcome)

    print(cum_outcomes)
    print(cum_probs)
    sum_rps = 0
    for i in range(len(outcome)):
        sum_rps+= (cum_probs[i] - cum_outcomes[i])**2

    return sum_rps/(len(outcome)-1)

In [251]:
# Main function
def main():
    # Load the results data file for seasons 2006-07 to 2016-17
    df_results = pd.read_csv('../data/results.csv')

    # Load the fixtures data file for the season 2017-18
    df_fixtures = pd.read_csv('../data/smallFixtures.csv')

    # Initialize ratings based on the results data
    ratings = initialize_ratings(df_results, df_fixtures)
    #print ('Ratings:', ratings, end='\n\n')

    """
    ratings['Leicester City']['brH'] = 0.463014
    ratings['Leicester City']['brA'] = 0.208624
    ratings['Leicester City']['continuous_overunderperformances'] = 3

    ratings['Stoke City']['brH'] = 0.537708
    ratings['Stoke City']['brA'] = 0.037819
    ratings['Stoke City']['continuous_overunderperformances'] = -1
    """


    # Calculate the rating difference between the two teams
    #predict_outcomes(df_fixtures, ratings)



    # Update ratings based on the results data
    ratings = update_ratings(df_results, ratings)

    # Call Ranked Probability Score function
    probs = [0.486, 0.261, 0.253]
    outcome = [1, 0, 0]
    rps_score = rps(probs, outcome)
    print("RPS Score:", rps_score, end='\n\n')

    for team, team_ratings in ratings.items():
        print(f"Team: {team}")
        print(f"Background Rating Home: {team_ratings['brH']}")
        print(f"Background Rating Away: {team_ratings['brA']}")
        print(f"Continuous Over/Underperformances: {team_ratings['continuous_overunderperformances']}")
        print()

    """
    # Calculate the mean and standard deviation of the rating differences
    rating_differences = calculate_rating_difference(df_results, ratings)
    mean = np.mean(rating_differences)
    std = np.std(rating_differences)
    print("Mean:", mean)
    print("Standard Deviation:", std, end='\n\n')
    """



In [252]:
if __name__ == '__main__':
    main()

Teams: {'Cardiff City', 'Bolton Wanderers', 'Blackpool', 'Queens Park Rangers', 'Leicester City', 'Sunderland', 'Crystal Palace', 'Portsmouth', 'Wolverhampton Wanderers', 'Derby County', 'AFC Bournemouth', 'Liverpool', 'Manchester United', 'Everton', 'Arsenal', 'Birmingham City', 'Hull City', 'Wigan Athletic', 'Norwich City', 'Reading', 'Swansea City', 'Watford', 'Newcastle United', 'Aston Villa', 'Chelsea', 'Stoke City', 'West Bromwich Albion', 'Blackburn Rovers', 'Tottenham Hotspur', 'Southampton', 'Manchester City', 'West Ham United', 'Burnley', 'Middlesbrough', 'Charlton Athletic', 'Fulham', 'Sheffield United'}

Game Nr: 0
Game Nr: 1
Game Nr: 2
Game Nr: 3
Game Nr: 4
Game Nr: 5
Game Nr: 6
Game Nr: 7
Game Nr: 8
Game Nr: 9
Game Nr: 10
Game Nr: 11
Game Nr: 12
Game Nr: 13
Game Nr: 14
Game Nr: 15
Game Nr: 16
Game Nr: 17
Game Nr: 18
Game Nr: 19
Game Nr: 20
Game Nr: 21
Game Nr: 22
Game Nr: 23
Game Nr: 24
Game Nr: 25
Game Nr: 26
Game Nr: 27
Game Nr: 28
Game Nr: 29
Game Nr: 30
Game Nr: 31
Ga