In [215]:
import numpy as np
import pandas as pd
from scipy.stats import norm

In [216]:
# Function to initialize ratings
def initialize_ratings(df_results):
    ratings = {}

    # Iterate over teams in the results data
    teams = set(df_results['home_team']).union(set(df_results['away_team']))
    for team in teams:
        # Initialize ratings for each team
        ratings[team] = {
            'brH': 0.0,
            'brA': 0.0,
            'continuous_overunderperformances': 0
        }
    print('Teams:', teams, end='\n\n')
    return ratings

In [217]:
# Function to update ratings based on results data
def update_ratings(df_results, ratings):

    #lambda: Determines to what extent the new match results influence the team ratings (could be improved to include temporal difference between matches)
    learning_rate_lambda = 0.054

    #psi: diminish the impact each additional goal difference error has on team ratings
    diminishing_function_psi = lambda error: 3 * np.log10(1 + error)

    #gamma: determines to what extent performances at the home grounds influence away team ratings and vice versa
    learning_rate_gamma = 0.79

    # Iterate over each match in the results data
    for index, row in df_results.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']

        observed_goal_difference = row['home_goals'] - row['away_goals']

        #Calculate expected goals for home team
        expected_goal_x = 10 ** (abs(ratings[home_team]['brH']) / 3) - 1

        #Calculate expected goals for away team
        expected_goal_y = 10 ** (abs(ratings[away_team]['brA']) / 3) - 1

        # Calculate expected goal difference based on ratings
        expected_goal_difference = expected_goal_x - expected_goal_y

        # Calculate the error between observed and expected goal difference
        error = abs(observed_goal_difference - expected_goal_difference)

        #diminish the impact of the goal difference error for both teams x and y respectively
        if (expected_goal_difference < observed_goal_difference):
            diminishing_function_psi_x = diminishing_function_psi(error)
            diminishing_function_psi_y = -diminishing_function_psi(error)
        else:
            diminishing_function_psi_x = -diminishing_function_psi(error)
            diminishing_function_psi_y = diminishing_function_psi(error)

        # Update the home team ratings
        previous_home_rating = ratings[home_team]['brH']
        ratings[home_team]['brH'] = previous_home_rating + diminishing_function_psi_x * learning_rate_lambda
        ratings[home_team]['brA'] += (ratings[home_team]['brH'] - previous_home_rating) * learning_rate_gamma

        # Update the away team ratings
        previous_away_rating = ratings[away_team]['brA']
        ratings[away_team]['brA'] = previous_away_rating + diminishing_function_psi_y * learning_rate_lambda
        ratings[away_team]['brH'] += (ratings[away_team]['brA'] - previous_away_rating) * learning_rate_gamma

        # Update the continuous over/underperformances for the home team
        if (observed_goal_difference > expected_goal_difference):
            ratings[home_team]['continuous_overunderperformances'] = max(1, ratings[home_team]['continuous_overunderperformances'] + 1)
            ratings[away_team]['continuous_overunderperformances'] = min(-1, ratings[away_team]['continuous_overunderperformances'] - 1)
        elif (observed_goal_difference < expected_goal_difference):
            ratings[home_team]['continuous_overunderperformances'] = min(-1, ratings[home_team]['continuous_overunderperformances'] - 1)
            ratings[away_team]['continuous_overunderperformances'] = max(1, ratings[away_team]['continuous_overunderperformances'] + 1)

        #What is supposed to happen in the rare case of observed goals = expected goals?

        #printing to check
        print("Home Team:", home_team)
        print("Away Team:", away_team)
        print("Observed Goal Difference:", observed_goal_difference)
        print("Expected Goal Difference:", expected_goal_difference)
        print("Error:", error)
        print("Background Rating x Home:", ratings[home_team]['brH'])
        print("Background Rating x Away:", ratings[home_team]['brA'])
        print("Background Rating y Home:", ratings[away_team]['brH'])
        print("Background Rating y Away:", ratings[away_team]['brA'])
        print("Continuous Over/Underperformances x:", ratings[home_team]['continuous_overunderperformances'])
        print("Continuous Over/Underperformances y:", ratings[away_team]['continuous_overunderperformances'])


    return ratings

In [218]:
# Function to calculate probabilities based on ratings
def calculate_rating_difference(df_fixtures, ratings):

    rating_differences = []

     #phi: Represents the number of continuous performances, above or below expectations, which do not trigger the form factor
    form_threshold_phi = 1

    #mu: represents the rating difference used to establish provisional ratings from background ratings
    rating_impact_mu = 0.01

    #delta: the level by which rating impact μ diminishes with each additional continuous over/under-performance
    diminishing_factor_delta = 2.5

    # Iterate over each match in the results data
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']

        # Calculate performance factor for home team
        a = ratings[home_team]['continuous_overunderperformances'] - form_threshold_phi
        b = a ** diminishing_factor_delta
        if (a == 0):
            form_factor_home = 0
        else:
            form_factor_home = a / b

        # Calculate home team rating
        home_rating = ratings[home_team]['brH']
        if (ratings[home_team]['continuous_overunderperformances'] > form_threshold_phi):
            home_rating = ratings[home_team]['brH'] + rating_impact_mu * form_factor_home
        if (ratings[home_team]['continuous_overunderperformances'] < -form_threshold_phi):
            home_rating = ratings[home_team]['brH'] - rating_impact_mu * form_factor_home

        # Calculate performance factor for away team
        c = ratings[away_team]['continuous_overunderperformances'] - form_threshold_phi
        d = c ** diminishing_factor_delta
        if (c == 0):
            form_factor_away = 0
        else:
            form_factor_away = c / d

        # Calculate away team rating
        away_rating = ratings[away_team]['brA']
        if (ratings[away_team]['continuous_overunderperformances'] > form_threshold_phi):
            away_rating = ratings[away_team]['brA'] + rating_impact_mu * form_factor_away
        if (ratings[away_team]['continuous_overunderperformances'] < -form_threshold_phi):
            away_rating = ratings[away_team]['brA'] - rating_impact_mu * form_factor_away

        rating_difference = home_rating - away_rating

        rating_differences.append(rating_difference)

    print("Rating differences:", rating_differences, end='\n\n')


    return rating_differences

In [219]:
def calculate_probabilities(rating_difference):

    mean_rating_difference = 0.4  # Adjust this value based on your data
    standard_deviation = 2.0  # Adjust this value based on your data

    home_win_probability = norm.cdf(rating_difference, loc=mean_rating_difference, scale=standard_deviation)

    draw_probability = norm.cdf(0, loc=mean_rating_difference, scale=standard_deviation)

    away_win_probability = 1 - home_win_probability - draw_probability


    return home_win_probability, draw_probability, away_win_probability

In [220]:
def predict_outcomes(df_fixtures, ratings):
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        rating_difference = calculate_rating_difference(df_fixtures, ratings)

        home_win_prob, draw_prob, away_win_prob = calculate_probabilities(rating_difference)

        print(f"{home_team} - {away_team}: Outcome Predictions")
        print(f"Home Win: {home_win_prob}")
        print(f"Draw: {draw_prob}")
        print(f"Away Win: {away_win_prob}", end='\n\n')

        # Compare with the actual result from the dataset and output correct or false
        # ...

In [221]:
def rps(probs, outcome):
    cum_probs = np.cumsum(probs)
    cum_outcomes = np.cumsum(outcome)

    print(cum_outcomes)
    print(cum_probs)
    sum_rps = 0
    for i in range(len(outcome)):
        sum_rps+= (cum_probs[i] - cum_outcomes[i])**2

    return sum_rps/(len(outcome)-1)

In [222]:
# Main function
def main():
    # Load the results data file for seasons 2006-07 to 2016-17
    df_results = pd.read_csv('../data/results.csv')

    # Load the fixtures data file for the season 2017-18
    df_fixtures = pd.read_csv('../data/smallFixtures.csv')

    # Initialize ratings based on the results data
    ratings = initialize_ratings(df_results)
    #print ('Ratings:', ratings, end='\n\n')

    ratings['Leicester City']['brH'] = 0.463014
    ratings['Leicester City']['brA'] = 0.208624
    ratings['Leicester City']['continuous_overunderperformances'] = 3

    ratings['Stoke City']['brH'] = 0.537708
    ratings['Stoke City']['brA'] = 0.037819
    ratings['Stoke City']['continuous_overunderperformances'] = -1

    # Calculate the rating difference between the two teams
    #predict_outcomes(df_fixtures, ratings)



    # Update ratings based on the results data
    ratings = update_ratings(df_results, ratings)

    # Call Ranked Probability Score function
    probs = [0.486, 0.261, 0.253]
    outcome = [1, 0, 0]
    rps_score = rps(probs, outcome)
    print("RPS Score:", rps_score, end='\n\n')

    # Calculate the mean and standard deviation of the rating differences
    rating_differences = calculate_rating_difference(df_results, ratings)
    mean = np.mean(rating_differences)
    std = np.std(rating_differences)
    print("Mean:", mean)
    print("Standard Deviation:", std, end='\n\n')



In [223]:
if __name__ == '__main__':
    main()

Teams: {'Derby County', 'Sunderland', 'West Bromwich Albion', 'Wigan Athletic', 'Chelsea', 'Sheffield United', 'Burnley', 'Cardiff City', 'Charlton Athletic', 'Reading', 'Fulham', 'AFC Bournemouth', 'Blackpool', 'Wolverhampton Wanderers', 'Manchester United', 'Leicester City', 'Southampton', 'Norwich City', 'Birmingham City', 'Everton', 'Tottenham Hotspur', 'Stoke City', 'Middlesbrough', 'Swansea City', 'Queens Park Rangers', 'Arsenal', 'Bolton Wanderers', 'Aston Villa', 'Newcastle United', 'Hull City', 'West Ham United', 'Blackburn Rovers', 'Watford', 'Portsmouth', 'Manchester City', 'Crystal Palace', 'Liverpool'}

Home Team: Sheffield United
Away Team: Liverpool
Observed Goal Difference: 0.0
Expected Goal Difference: 0.0
Error: 0.0
Background Rating x Home: 0.0
Background Rating x Away: 0.0
Background Rating y Home: 0.0
Background Rating y Away: 0.0
Continuous Over/Underperformances x: 0
Continuous Over/Underperformances y: 0
Home Team: Arsenal
Away Team: Aston Villa
Observed Goal Di

  expected_goal_y = 10 ** (abs(ratings[away_team]['brA']) / 3) - 1
  ratings[home_team]['brH'] = previous_home_rating + diminishing_function_psi_x * learning_rate_lambda
  ratings[away_team]['brH'] += (ratings[away_team]['brA'] - previous_away_rating) * learning_rate_gamma
  ratings[away_team]['brA'] = previous_away_rating + diminishing_function_psi_y * learning_rate_lambda


Home Team: Sunderland
Away Team: Burnley
Observed Goal Difference: 0.0
Expected Goal Difference: nan
Error: nan
Background Rating x Home: nan
Background Rating x Away: nan
Background Rating y Home: nan
Background Rating y Away: nan
Continuous Over/Underperformances x: 2
Continuous Over/Underperformances y: 4
Home Team: West Ham United
Away Team: Leicester City
Observed Goal Difference: -1.0
Expected Goal Difference: nan
Error: nan
Background Rating x Home: nan
Background Rating x Away: nan
Background Rating y Home: nan
Background Rating y Away: nan
Continuous Over/Underperformances x: -131
Continuous Over/Underperformances y: 3
Home Team: AFC Bournemouth
Away Team: Swansea City
Observed Goal Difference: 2.0
Expected Goal Difference: nan
Error: nan
Background Rating x Home: nan
Background Rating x Away: nan
Background Rating y Home: nan
Background Rating y Away: nan
Continuous Over/Underperformances x: 0
Continuous Over/Underperformances y: 2
Home Team: Middlesbrough
Away Team: Manchest