In [3]:
import numpy as np
import pandas as pd

In [4]:
# Function to initialize ratings
def initialize_ratings(df_results):
    ratings = {}

    # Iterate over teams in the results data
    teams = set(df_results['home_team']).union(set(df_results['away_team']))
    for team in teams:
        # Initialize ratings for each team
        ratings[team] = {
            'brH': 0.0,
            'brA': 0.0,
            'prH': 0.0,
            'prA': 0.0,
            'continuous_underperformances': 0,
            'continuous_overperformances': 0
        }

    return ratings

In [5]:
# Function to update ratings based on results data
def update_ratings(df_results, ratings):
    learning_rate_lambda = 0.054
    diminishing_function_psi = lambda error: 3 * np.log10(1 + error)
    learning_rate_gamma = 0.79
    form_threshold_phi = 1
    rating_impact_mu = 0.01
    diminishing_factor_delta = 2.5

    # Iterate over each match in the results data
    for index, row in df_results.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        observed_goal_difference = row['home_goals'] - row['away_goals']

        # Calculate expected goal difference based on ratings
        expected_goal_difference = ratings[home_team]['brH'] - ratings[away_team]['brA']

        # Calculate the error between observed and expected goal difference
        error = abs(observed_goal_difference - expected_goal_difference)

        # Update the home team ratings
        ratings[home_team]['brH'] += diminishing_function_psi(error) * learning_rate_lambda
        ratings[home_team]['brA'] += (ratings[home_team]['brH'] - ratings[home_team]['brA']) * learning_rate_gamma

        # Update the away team ratings
        ratings[away_team]['brA'] += diminishing_function_psi(error) * learning_rate_lambda
        ratings[away_team]['brH'] += (ratings[away_team]['brA'] - ratings[away_team]['brH']) * learning_rate_gamma

        # Update the continuous under/over-performance counters
        if observed_goal_difference < expected_goal_difference:
            ratings[home_team]['continuous_underperformances'] += 1
            ratings[away_team]['continuous_overperformances'] += 1
        elif observed_goal_difference > expected_goal_difference:
            ratings[home_team]['continuous_overperformances'] += 1
            ratings[away_team]['continuous_underperformances'] += 1
        else:
            ratings[home_team]['continuous_underperformances'] = 0
            ratings[away_team]['continuous_underperformances'] = 0

        # Update the provisional ratings based on form factor
        if ratings[home_team]['continuous_underperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_underperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_underperformances'] - form_threshold_phi)
            ratings[home_team]['prH'] = ratings[home_team]['brH'] - form_impact
        elif ratings[home_team]['continuous_overperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_overperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[home_team]['continuous_overperformances'] - form_threshold_phi)
            ratings[home_team]['prH'] = ratings[home_team]['brH'] + form_impact
        else:
            ratings[home_team]['prH'] = ratings[home_team]['brH']

        if ratings[away_team]['continuous_underperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_underperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_underperformances'] - form_threshold_phi)
            ratings[away_team]['prA'] = ratings[away_team]['brA'] - form_impact
        elif ratings[away_team]['continuous_overperformances'] > form_threshold_phi:
            if form_threshold_phi > 1:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_overperformances'] - form_threshold_phi) / ((form_threshold_phi - 1) * diminishing_factor_delta)
            else:
                form_impact = rating_impact_mu * (ratings[away_team]['continuous_overperformances'] - form_threshold_phi)
            ratings[away_team]['prA'] = ratings[away_team]['brA'] + form_impact
        else:
            ratings[away_team]['prA'] = ratings[away_team]['brA']

    return ratings

In [6]:
# Function to calculate probabilities based on ratings
def calculate_probabilities(ratings):
    probabilities = {}

    # Iterate over teams and calculate probabilities
    for team in ratings:
        home_rating = ratings[team]['prH']
        away_rating = ratings[team]['prA']

        # Calculate probabilities based on ratings
        total_rating = home_rating + away_rating
        home_win_prob = home_rating / total_rating
        draw_prob = 1 / total_rating
        away_win_prob = away_rating / total_rating

        probabilities[team] = {
            'home_win': home_win_prob,
            'draw': draw_prob,
            'away_win': away_win_prob
        }

    return probabilities

# Function to predict outcomes for fixtures
def predict_outcomes(df_fixtures, ratings):
    predictions = []

    # Iterate over each fixture in the fixtures data
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']

        # Get the probabilities for home-win, draw, and away-win
        home_win_prob = ratings[home_team]['home_win']
        draw_prob = ratings[home_team]['draw']
        away_win_prob = ratings[home_team]['away_win']

        # Make a prediction based on the highest probability
        if home_win_prob > draw_prob and home_win_prob > away_win_prob:
            prediction = 'Home Win'
        elif draw_prob > home_win_prob and draw_prob > away_win_prob:
            prediction = 'Draw'
        else:
            prediction = 'Away Win'

        predictions.append((home_team, away_team, prediction))

    return predictions

In [7]:
# Main function
def main():
    # Load the results data file
    df_results = pd.read_csv('../data/results.csv')

    # Initialize ratings based on the results data
    ratings = initialize_ratings(df_results)

    # Update ratings based on the results data
    ratings = update_ratings(df_results, ratings)

    # Load the fixtures data file for prediction
    df_fixtures = pd.read_csv('../data/results201718.csv')

    # Predict the outcomes of fixtures
    predictions = predict_outcomes(df_fixtures, ratings)

    # Compare the predictions with the actual results
    correct_predictions = 0
    for index, row in df_fixtures.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        actual_result = row['result']

        # Find the corresponding prediction for the fixture
        prediction = [p for p in predictions if p[0] == home_team and p[1] == away_team][0][2]

        # Check if the prediction is correct
        if actual_result == prediction:
            correct_predictions += 1

        # Output the prediction
        print(f"{home_team} - {away_team}: Outcome Predictions (Home Win, Draw, Away Win)")
        print(f"We predict: {prediction}")
        print()

    # Calculate the accuracy of the predictions
    accuracy = correct_predictions / len(df_fixtures) * 100

    # Output the overall accuracy
    print(f"Accuracy: {accuracy}%")

In [8]:
# Call the main function
if __name__ == '__main__':
    main()


KeyError: 'home_win'