In [33]:
import pandas as pd
from datetime import datetime
from scipy.stats import poisson
import itertools
import re

In [34]:
past_games_df = pd.read_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/clean/PastGames.csv")
games_df = pd.read_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/clean/Game.csv")
fixtures_df = pd.read_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/clean/Fixture.csv")

In [35]:
def add_weights(df):
    # 'Date' column in datetime format
    # If 'Date' is not in datetime format, convert it first:
    df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')

    # Reference date
    reference_date = pd.to_datetime(datetime.now())

    # Calculate the difference in years from the reference date
    df['YearsFromRef'] = reference_date.year - df['Date'].dt.year

    # Apply an exponentially decaying weight based on years
    # Adjust the decay rate as needed. A smaller decay rate means that the weight decreases more slowly.
    decay_rate = 0.1  # Example decay rate
    df['Weight'] = df['YearsFromRef'].apply(lambda x: 2.71828 ** (-decay_rate * x))

    return df

# Calculating weighted average goals for each team
def weighted_avg_goals(df, team_column, goals_column):
    return (df['Weight'] * df[goals_column]).sum() / df['Weight'].sum()

# Adjusting for the opponent and applying Poisson model
def predict_goals(team, opponent, home_or_away, df):
    # Weighted averages
    team_goals_avg = weighted_avg_goals(df[df[home_or_away + 'Team'] == team], home_or_away + 'Team', home_or_away + 'TeamGoal')
    opponent_def_avg = weighted_avg_goals(df[df[home_or_away + 'Team'] == opponent], home_or_away + 'Team', 'AwayTeamGoal' if home_or_away == 'Home' else 'HomeTeamGoal')
    
    # Poisson probabilities for each scoreline up to 5 goals
    adjusted_avg = team_goals_avg * opponent_def_avg
    return [poisson.pmf(i, adjusted_avg) for i in range(7)]

def get_match_probabilities(home_team, away_team, team_home_probs, team_away_probs):
    goal_combinations = list(itertools.product(range(len(team_home_probs)), range(len(team_b_probs))))
    # Calculate the probability of each combination and store with the combination
    match_probabilities = []
    for home_team_goal, away_team_goal in goal_combinations:
        match_probabilities.append(
            (home_team, away_team, home_team_goal, away_team_goal, team_home_probs[home_team_goal] * team_away_probs[away_team_goal])
            )

    # Sort the combinations by probability in descending order
    match_probabilities.sort(key=lambda x: x[6], reverse=True)

    match_probabilities = pd.DataFrame(match_probabilities, columns=['HomeTeam', 'AwayTeam', 'HomeTeamGoal', 'AwayTeamGoal', 'OutcomeProb'])
    match_probabilities['TotalGoalScored'] = match_probabilities['HomeTeamGoal'] + match_probabilities['AwayTeamGoal']

    return match_probabilities

def get_final_probabilities(home_team, away_team, df):
    finalProbabilities = {}
    
    finalProbabilities['HomeTeam'] = home_team
    finalProbabilities['AwayTeam'] = away_team
    finalProbabilities['Win'] = sum(df.query("HomeTeamGoal > AwayTeamGoal")["OutcomeProb"])*100
    finalProbabilities['Draw'] = sum(df.query("HomeTeamGoal == AwayTeamGoal")["OutcomeProb"])*100
    finalProbabilities['Loose'] = sum(df.query("HomeTeamGoal < AwayTeamGoal")["OutcomeProb"])*100
    finalProbabilities['BothScore'] = sum(df.query("(HomeTeamGoal > 0) and (AwayTeamGoal > 0)")["OutcomeProb"])*100
    finalProbabilities['Over 1.5'] = sum(df.query("TotalGoalScored > 1")["OutcomeProb"])*100
    finalProbabilities['Over 2.5'] = sum(df.query("TotalGoalScored > 2")["OutcomeProb"])*100
    finalProbabilities['Over 3.5'] = sum(df.query("TotalGoalScored > 3")["OutcomeProb"])*100

    return finalProbabilities

In [36]:
past_games_df = past_games_df[['Date', 'HomeTeam', 'AwayTeam', 'HomeTeamGoal', 'AwayTeamGoal']]
all_games_df = pd.concat([past_games_df, games_df])
all_games_df = add_weights(all_games_df)

In [37]:
probabilities_list = []
for i in range(fixtures_df.shape[0]):
    home_team = fixtures_df.iloc[i]['HomeTeam']
    away_team = fixtures_df.iloc[i]['AwayTeam']
    team_a_probs = predict_goals(home_team, away_team, 'Home', all_games_df)
    team_b_probs = predict_goals(away_team, home_team, 'Away', all_games_df)

    match_probabilities = get_match_probabilities(home_team, away_team, team_a_probs, team_b_probs)
    finalProbabilities = get_final_probabilities(home_team, away_team, match_probabilities)
    
    probabilities_list.append(finalProbabilities)

In [38]:
probabilities_df = pd.DataFrame(probabilities_list)
probabilities_df

Unnamed: 0,HomeTeam,AwayTeam,Win,Draw,Loose,BothScore,Over 1.5,Over 2.5,Over 3.5
0,Algeria,Burkina Faso,36.374988,25.953682,35.303559,52.619184,72.284783,47.623057,25.6438
1,Mauritania,Angola,0.0,10.814554,81.670649,0.0,57.616082,30.864066,11.029432
2,Tunisie,Mali,25.831011,32.130961,41.460027,34.53961,53.780568,26.941399,10.643847
3,Maroc,Congo,37.699527,18.441787,27.065128,65.800417,77.700701,67.226961,51.073152
4,Zambia,Tanzania,53.320824,7.922162,4.563346,38.624267,60.940539,51.330854,36.029709
5,South Africa,Namibia,50.079928,6.525821,3.528767,34.886498,56.231017,48.018945,34.217082
6,Equatorial Guinea,Ivory Coast,48.24266,23.024499,24.279471,55.030406,75.673199,53.289745,30.885593
7,Guinea-Bissau,Nigeria,0.0,13.117352,81.331139,0.0,54.686725,27.626201,9.304113
8,Cape Verde,Egypt,42.281791,30.873853,26.092738,37.172786,56.998143,29.955972,12.469114
9,Mozambique,Ghana,38.220209,20.757387,31.501303,65.28428,80.549957,65.146102,45.127855
