SETUP

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score
import numpy as np
import random as r

In [2]:
matches = pd.read_csv('matches.csv', index_col=0)

In [3]:
class MissingDict(dict):
    __missing__ = lambda self, key: key

map_values = {
    "Brighton and Hove Albion": "Brighton",
    "Manchester United": "Manchester Utd", 
    "Newcastle United": "Newcastle Utd",
    "Tottenham Hotspur": "Tottenham",
    "West Ham United": "West Ham",
    "Nott'ham Forest": "Nottingham Forest",
    "Wolverhampton Wanderers": "Wolves",
}

mapping = MissingDict(**map_values)

In [4]:
matches.dtypes

date        object
time        object
comp        object
round       object
day         object
            ...   
npxg       float64
npxg/sh    float64
g-xg       float64
np:g-xg    float64
team        object
Length: 152, dtype: object

In [5]:
# Convert column data to numeric
matches['team'] = matches['team'].replace(mapping)
matches['opponent'] = matches['opponent'].replace(mapping)
matches["date"] = pd.to_datetime(matches["date"])
matches["day_code"] = matches["date"].dt.dayofweek
matches["date"] = matches["date"].dt.date
matches["venue_code"] = matches["venue"].astype('category').cat.codes
matches["team_code"] = matches["team"].astype('category').cat.codes
matches["opp_code"] = matches["opponent"].astype('category').cat.codes
matches["hour"] = matches["time"].str.replace(":.+", "", regex=True).astype("int")
# matches = matches[matches["date"] < '2023-05-25']
matches["pts"] = matches["result"].map({'W': 3, 'D': 1, 'L': 0})


In [6]:
# Group the DataFrame by 'team' column
grouped = matches.groupby('team')

# Initialize an empty DataFrame to store the filtered matches
all_matches = []

# Iterate over each group
for team, group in grouped:
    # Find the index of the next match for the team
    # past_match_group = group[group['date'] < pd.Timestamp.now().date()]
    next_match_index = group[group['date'] < pd.Timestamp.now().date()].shape[0]
    if (next_match_index <= group.shape[0]):
        all_matches.append(group[:next_match_index + 1])

all_matches_df = pd.concat(all_matches).sort_values(by=['team', 'date'])

next_matches = all_matches_df[all_matches_df['date'] >= pd.Timestamp.now().date()].copy()
next_matches['match'] = next_matches[['team', 'opponent']].apply(lambda x: '_'.join(sorted(x)), axis=1)
groups = next_matches.groupby(['date', 'time', 'match'])
valid_matches = groups.filter(lambda x: len(x) == 2)
valid_matches.drop(columns=['match'], inplace=True)

final_matches = pd.concat([all_matches_df[all_matches_df['date'] < pd.Timestamp.now().date()], valid_matches])
final_matches.sort_values(by=['team', 'date'], inplace=True)
final_matches = final_matches.reset_index(drop=True)


In [7]:
print(final_matches.dtypes)

date           object
time           object
comp           object
round          object
day            object
               ...   
venue_code       int8
team_code        int8
opp_code         int8
hour            int32
pts           float64
Length: 158, dtype: object


CALCULATE MEAN AND ROLLING VALUES

In [8]:
total = 0
for col in final_matches.columns:
    nan_locations = final_matches.index[final_matches[col].isna()].tolist()
    if nan_locations:
        print(col, ": ", nan_locations)
        total += len(nan_locations)
print(total)

result :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
gf :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
ga :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
xga :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
formation :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
tkl :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
tklw :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
tkldef3rd :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
tklmid3rd :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
tklatt3rd :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
dritkl :  [37, 112, 186, 223, 261, 299, 374, 412, 450, 487, 524, 599, 637, 675, 713, 751]
drichall :  [3

In [None]:
# valid_cols = final_matches.select_dtypes(include=['int8', 'int64', 'float64', 'int32']).columns.tolist()
# total = 0

# for col in valid_cols:
#     for i in range(final_matches.shape[0]):
#         val = final_matches.loc[i, col]
#         if pd.isnull(val):
#             average = final_matches[col][:i].dropna().mean()
#             final_matches.at[i, col] = average
#             total+= 1
# print(total)

In [9]:
teams = final_matches['team'].unique().tolist()
dfs = [final_matches[final_matches['team'] == x] for x in teams]
dfs = [x.reset_index(drop=True) for x in dfs]

In [10]:
valid_cols = final_matches.select_dtypes(include=['int8', 'int64', 'float64', 'int32']).columns.tolist()
total = 0

for df in dfs:
    for col in valid_cols:
        for i in range(df.shape[0]-1):
            val = df.loc[i, col]
            if pd.isnull(val):
                if i == 0:
                    average = df[col].dropna().mean()
                    df.at[i, col] = average
                else:
                    average = df[col][:i].dropna().mean()
                    df.at[i, col] = average
                total += 1
print(total)
    

108


In [11]:
final_matches = pd.concat(dfs).sort_values(by=['team', 'date']).reset_index(drop=True)

In [12]:
final_matches.to_csv('final_matches.csv')

In [21]:
final_matches[final_matches['team'] == 'Manchester City']

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,npxg/sh,g-xg,np:g-xg,team,day_code,venue_code,team_code,opp_code,hour,pts
451,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2.0,0.0,West Ham,...,0.11,-0.2,-0.4,Manchester City,6,0,12,18,16,3.0
452,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4.0,0.0,Bournemouth,...,0.09,1.3,1.3,Manchester City,5,1,12,2,15,3.0
453,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3.0,3.0,Newcastle Utd,...,0.1,0.9,0.9,Manchester City,6,0,12,14,16,1.0
454,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4.0,2.0,Crystal Palace,...,0.13,1.8,1.8,Manchester City,5,1,12,6,15,3.0
455,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6.0,0.0,Nottingham Forest,...,0.2,2.7,2.7,Manchester City,2,1,12,15,19,3.0
456,2022-09-03,17:30,Premier League,Matchweek 6,Sat,Away,D,1.0,1.0,Aston Villa,...,0.16,-1.1,-1.1,Manchester City,5,0,12,1,17,1.0
457,2022-09-17,12:30,Premier League,Matchweek 8,Sat,Away,W,3.0,0.0,Wolves,...,0.07,1.9,1.9,Manchester City,5,0,12,19,12,3.0
458,2022-10-02,14:00,Premier League,Matchweek 9,Sun,Home,W,6.0,3.0,Manchester Utd,...,0.15,2.8,2.8,Manchester City,6,1,12,13,14,3.0
459,2022-10-08,15:00,Premier League,Matchweek 10,Sat,Home,W,4.0,0.0,Southampton,...,0.12,1.6,1.6,Manchester City,5,1,12,16,15,3.0
460,2022-10-16,16:30,Premier League,Matchweek 11,Sun,Away,L,0.0,1.0,Liverpool,...,0.06,-1.0,-1.0,Manchester City,6,0,12,11,16,0.0


In [22]:
all_cols = final_matches.columns.tolist()
cols = all_cols[7:9] + all_cols[10:11] + all_cols[12:151] + all_cols[-1:]
rolling_cols = [f"{c}_rolling" for c in cols]
mean_cols = [f"{c}_mean" for c in cols]

In [23]:
rolling_averages = final_matches.groupby('team')[cols].rolling(window=5, min_periods=3, closed='left').mean()
rolling_averages.reset_index(level=0, drop=True, inplace=True)

# overall_averages = final_matches.groupby('team')[cols].expanding().mean().shift().reset_index(level=0, drop=True)
overall_averages = final_matches.groupby('team')[cols].apply(lambda x: x.shift().expanding().mean())
# overall_averages = final_matches[cols].expanding().mean().shift()
overall_averages.reset_index(level=0, drop=True, inplace=True)

result_df = pd.concat([final_matches, rolling_averages.add_suffix('_rolling'), overall_averages.add_suffix('_mean')], axis=1)


In [16]:
with open('t.txt', 'w') as f:
    for col in result_df.columns:
        f.write(col + '\n')

In [24]:
result_df[result_df['team'] == 'Brighton'][['date', 'team', 'opponent', 'result', 'gf', 'ga', 'gf_rolling', 'ga_rolling', 'gf_mean', 'ga_mean', 'xg', 'xg_rolling', 'xg_mean']].head()

Unnamed: 0,date,team,opponent,result,gf,ga,gf_rolling,ga_rolling,gf_mean,ga_mean,xg,xg_rolling,xg_mean
150,2022-08-07,Brighton,Manchester Utd,W,2.0,1.0,,,,,1.5,,
151,2022-08-13,Brighton,Newcastle Utd,D,0.0,0.0,,,2.0,1.0,1.5,,1.5
152,2022-08-21,Brighton,West Ham,W,2.0,0.0,,,1.0,0.5,1.7,,1.5
153,2022-08-27,Brighton,Leeds United,W,1.0,0.0,1.333333,0.333333,1.333333,0.333333,2.0,1.566667,1.566667
154,2022-08-30,Brighton,Fulham,L,1.0,2.0,1.25,0.25,1.25,0.25,1.4,1.675,1.675


In [25]:
result_df[result_df['team'] == 'Brighton'][['date', 'team', 'opponent', 'result', 'gf', 'ga', 'gf_rolling', 'ga_rolling', 'gf_mean', 'ga_mean', 'xg', 'xg_rolling', 'xg_mean']].head()

Unnamed: 0,date,team,opponent,result,gf,ga,gf_rolling,ga_rolling,gf_mean,ga_mean,xg,xg_rolling,xg_mean
150,2022-08-07,Brighton,Manchester Utd,W,2.0,1.0,,,,,1.5,,
151,2022-08-13,Brighton,Newcastle Utd,D,0.0,0.0,,,2.0,1.0,1.5,,1.5
152,2022-08-21,Brighton,West Ham,W,2.0,0.0,,,1.0,0.5,1.7,,1.5
153,2022-08-27,Brighton,Leeds United,W,1.0,0.0,1.333333,0.333333,1.333333,0.333333,2.0,1.566667,1.566667
154,2022-08-30,Brighton,Fulham,L,1.0,2.0,1.25,0.25,1.25,0.25,1.4,1.675,1.675


In [26]:
result_df.to_csv('matches_rolling.csv')

START OF ML ALGORITHM

In [None]:
def find_params(data, predictors):
    train = data[data["date"] < '2023-04-01']
    test = data[data["date"] > '2023-04-01']
    X_train = train[predictors]
    y_train = train[["gf", "ga"]]
    X_test = test[predictors]
    y_test = test[["gf", "ga"]]

    # Define the parameter space
    param_space = {
        'n_estimators': [i for i in range(1, 2000) if i % 100 == 0],
        'min_samples_split': [i for i in range(2, 10)],
        'min_samples_leaf': [i for i in range(1, 10)],
        'max_features': ['auto', 'sqrt'],
        'random_state': [42]
    }

    # Iterate over the hyperparameter combinations
    best_result_score = -np.inf
    best_scoreline_score = -np.inf
    best_overall_score = -np.inf

    best_res_scores = []
    best_scoreline_scores = []
    best_overall_scores = []

    best_result_params = None
    best_scoreline_params = None
    best_overall_params = None
    
    for params in ParameterGrid(param_space):
        # Create an instance of the Random Forest classifier
        rf = RandomForestClassifier(**params)

        rf.fit(X_train, y_train)
        preds = rf.predict(X_test)
        combined = pd.DataFrame({
            "Date": test["date"],
            "Team": test["team"],
            "Opponent": test["opponent"],
            "Predicted_GF": preds[:, 0],  # Assuming "gf" is the first column in predictions
            "Predicted_GA": preds[:, 1],  # Assuming "ga" is the second column in predictions
            "Actual_GF": y_test["gf"],
            "Actual_GA": y_test["ga"]
        })

        merged = combined.merge(combined, left_on=["Date", "Team"], right_on=["Date", "Opponent"])

        # Calculate the mean score
        correct_wins = merged[(merged["Predicted_GF_x"] > merged["Predicted_GA_x"]) & (merged["Actual_GF_x"] > merged["Actual_GA_x"])].shape[0]/merged.shape[0]
        correct_losses = merged[(merged["Predicted_GF_x"] < merged["Predicted_GA_x"]) & (merged["Actual_GF_x"] < merged["Actual_GA_x"])].shape[0]/merged.shape[0]
        correct_draws = merged[(merged["Predicted_GF_x"] == merged["Predicted_GA_x"]) & (merged["Actual_GF_x"] == merged["Actual_GA_x"])].shape[0]/merged.shape[0]
        correct_res = correct_wins + correct_losses + correct_draws
        correct_score = merged[(merged["Predicted_GF_x"] == merged["Actual_GF_x"]) & (merged["Predicted_GA_x"] == merged["Actual_GA_x"])].shape[0]/merged.shape[0]

        mean_score = (correct_res + correct_score)/2
        # Check if the current combination is the best so far
        if best_result_score < correct_res:
            best_result_score = correct_res
            best_result_params = params
            best_res_scores = [correct_res, correct_score, mean_score]

        if best_scoreline_score < correct_score:
            best_scoreline_score = correct_score
            best_scoreline_params = params
            best_scoreline_scores = [correct_res, correct_score, mean_score]

        if best_overall_score < mean_score:
            best_overall_score = mean_score
            best_overall_params = params
            best_overall_scores = [correct_res, correct_score, mean_score]

    # Print the best hyperparameters and the best score
    print("Best Result params:", best_result_params, "; Best Result Score:", best_res_scores[0], "; Best Scoreline Score:", best_res_scores[1], "; Best Overall Score:", best_res_scores[2])
    print("Best Scoreline params:", best_scoreline_params, "; Best Result Score:", best_scoreline_scores[0], "; Best Scoreline Score:", best_scoreline_scores[1], "; Best Overall Score:", best_scoreline_scores[2])
    print("Best Overall params:", best_overall_params, "; Best Result Score:", best_overall_scores[0], "; Best Scoreline Score:", best_overall_scores[1], "; Best Overall Score:", best_overall_scores[2])

RANDOM FOREST (GROUP MATCHES TO MAKE 1 PREDICTION PER MATCH)

In [77]:
def make_predictions_single_tree_grouped(data, predictors, rf):
    X = data[predictors]
    y = data[['gf_home', 'gf_away']]

    print(len(X.columns))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    rf.fit(X_train, y_train)
    preds = rf.predict(X_test)

    # Create a new DataFrame with the desired columns
    result_df = X_test.copy()
    result_df['Date'] = data.loc[X_test.index, 'date']
    result_df['Home_Team'] = data.loc[X_test.index, 'home_team']
    result_df['Away_Team'] = data.loc[X_test.index, 'away_team']
    result_df['Actual_GF_Home'] = y_test['gf_home']
    result_df['Actual_GF_Away'] = y_test['gf_away']
    result_df['Predicted_GF_Home'] = preds[:, 0]  # Predicted goals for home team
    result_df['Predicted_GF_Away'] = preds[:, 1]  # Predicted goals for away team
    return result_df[['Date', 'Home_Team', 'Away_Team', 'Actual_GF_Home', 'Actual_GF_Away', 'Predicted_GF_Home', 'Predicted_GF_Away']]

In [28]:
home_table = result_df[result_df["venue"] == "Home"].sort_values(by=['date', 'time', 'team'])
away_table = result_df[result_df["venue"] == "Away"].sort_values(by=['date', 'time', 'opponent'])
home_table_renamed = home_table.rename(columns={"team": "home_team", "opponent": "away_team", "gf": "gf_home", "ga": "gf_away"})
away_table_renamed = away_table.rename(columns={"team": "away_team", "opponent": "home_team", "gf": "gf_away", "ga": "gf_home"})

In [29]:
grouped = pd.merge(home_table_renamed, away_table_renamed, on=["date", "time", "home_team", "away_team", "gf_home", "gf_away"], suffixes=("_home", "_away"))

In [30]:
grouped_final = grouped.sort_values(by='date')

In [31]:
grouped_final.to_csv('grouped_final.csv')

In [32]:
general = ["venue_code", "team_code", "day_code"]
attacking = ["gf", "xg", "sh", "sot", "npxg", "npxg/sh", "attboxtouches"]
passing = ["totpasscmp", "totpassatt", "totpasscmp%", "totpassdist", "prgpassdist", "xag", "xa", "keypasses"]
gk = ["sota", "saves", "save%", "psxg", "passlaunch%", "gklaunch%"]
ca = ["sca", "gca", "scalivepass", "gcalivepass"]
possesion = ["poss", "att3rdtouches", "attboxtouches", "atttakeons", "succtakeons", "carries", "totdistcarried", "prgdistcarried"]
defense = ["tkl", "tklw", "tkldef3rd", "tklmid3rd", "tklatt3rd", "blocks", "int"]
misc = ["fouls", "foulsdrawn", "recov", "aerialwon%"]

In [51]:
base = attacking + passing + gk + ca + possesion + defense + misc
base_averages = [f"{x}_rolling" for x in base] + [f"{x}_mean" for x in base]
base_home_away = [f"{x}_home" for x in base_averages] + [f"{x}_away" for x in base_averages]
predictors = [f"{x}_home" for x in general] + [f"{x}_away" for x in general] + base_home_away

In [97]:
rfTest = RandomForestClassifier(n_estimators=400, min_samples_split=50, min_samples_leaf=4, random_state=42)

In [98]:
combined = make_predictions_single_tree_grouped(grouped_final.dropna(), predictors, rfTest)
correct_scores = combined[(combined["Predicted_GF_Home"] == combined["Actual_GF_Home"]) & (combined["Predicted_GF_Away"] == combined["Actual_GF_Away"])].shape[0]
correct_home_wins = combined[(combined["Predicted_GF_Home"] > combined["Predicted_GF_Away"]) & (combined["Actual_GF_Home"] > combined["Actual_GF_Away"])].shape[0]
correct_away_wins = combined[(combined["Predicted_GF_Home"] < combined["Predicted_GF_Away"]) & (combined["Actual_GF_Home"] < combined["Actual_GF_Away"])].shape[0]
correct_draws = combined[(combined["Predicted_GF_Home"] == combined["Predicted_GF_Away"]) & (combined["Actual_GF_Home"] == combined["Actual_GF_Away"])].shape[0]
total_games = combined.shape[0]
print("correct scores:", correct_scores/total_games)
print("correct home wins:", correct_home_wins/total_games)
print("correct away wins:", correct_away_wins/total_games)
print("correct draws:", correct_draws/total_games)
print("correct results:", (correct_home_wins + correct_away_wins + correct_draws)/total_games)

182
correct scores: 0.12941176470588237
correct home wins: 0.3176470588235294
correct away wins: 0.03529411764705882
correct draws: 0.08235294117647059
correct results: 0.43529411764705883


In [34]:
next_games = grouped_final[grouped_final['date'] >= pd.Timestamp.now().date()]

In [64]:
def predict(data, predictors, rf):
    X = data[predictors]
    print(len(X.columns))
    y = data[['gf_home', 'gf_away']]

    preds = rf.predict(X)

    # Create a new DataFrame with the desired columns
    result_df = data.copy()
    result_df['date'] = data['date']
    result_df['home_team'] = data['home_team']
    result_df['away_team'] = data['away_team']
    result_df['predicted_gf_home'] = preds[:, 0]  # Predicted goals for home team
    result_df['predicted_gf_away'] = preds[:, 1]  # Predicted goals for away team

    return result_df[['date', 'home_team', 'away_team', 'predicted_gf_home', 'predicted_gf_away']]


In [54]:
next_games[predictors]

Unnamed: 0,venue_code_home,team_code_home,day_code_home,venue_code_away,team_code_away,day_code_away,gf_rolling_home,xg_rolling_home,sh_rolling_home,sot_rolling_home,...,tklw_mean_away,tkldef3rd_mean_away,tklmid3rd_mean_away,tklatt3rd_mean_away,blocks_mean_away,int_mean_away,fouls_mean_away,foulsdrawn_mean_away,recov_mean_away,aerialwon%_mean_away
368,1,4,2,0,12,2,1.8,2.04,18.0,5.2,...,7.305556,4.722222,5.25,2.388889,8.861111,5.916667,9.111111,9.944444,49.305556,53.294444
369,1,13,3,0,5,3,0.8,1.68,19.2,5.8,...,11.75,9.527778,6.944444,2.916667,12.138889,9.111111,10.611111,12.194444,53.5,50.730556
374,1,10,6,0,18,6,1.2,1.76,10.0,3.8,...,8.783784,7.621622,5.864865,2.459459,10.945946,10.945946,9.513514,8.324324,53.243243,51.443243
370,1,0,6,0,19,6,1.2,1.0,11.8,4.4,...,9.891892,9.72973,5.864865,2.027027,13.243243,7.486486,12.216216,10.297297,54.540541,45.740541
371,1,6,6,0,15,6,1.6,1.28,12.8,4.2,...,10.189189,9.945946,5.324324,2.027027,13.243243,9.27027,11.675676,10.297297,50.027027,48.845946
372,1,7,6,0,2,6,1.8,1.98,14.2,4.6,...,9.837838,8.405405,6.567568,1.405405,13.27027,9.378378,10.243243,9.567568,51.054054,49.745946
373,1,9,6,0,17,6,1.2,1.26,10.4,3.4,...,9.216216,8.27027,5.648649,2.108108,11.162162,8.864865,11.324324,9.351351,51.810811,52.737838
375,1,16,6,0,11,6,1.0,1.08,8.6,2.2,...,9.594595,6.135135,6.459459,2.810811,9.297297,8.648649,10.675676,8.567568,58.216216,52.024324


In [65]:
preds = predict(next_games, predictors, rfTest)

182


In [66]:
preds

Unnamed: 0,date,home_team,away_team,predicted_gf_home,predicted_gf_away
368,2023-05-24,Brighton,Manchester City,1.0,1.0
369,2023-05-25,Manchester Utd,Chelsea,1.0,0.0
374,2023-05-28,Leicester City,West Ham,0.0,1.0
370,2023-05-28,Arsenal,Wolves,3.0,1.0
371,2023-05-28,Crystal Palace,Nottingham Forest,2.0,0.0
372,2023-05-28,Everton,Bournemouth,1.0,1.0
373,2023-05-28,Leeds United,Tottenham,1.0,1.0
375,2023-05-28,Southampton,Liverpool,1.0,1.0


In [67]:
preds.to_csv('preds.csv')

In [72]:
combined

Unnamed: 0,Date,Home_Team,Away_Team,Actual_GF_Home,Actual_GF_Away,Predicted_GF_Home,Predicted_GF_Away
139,2022-11-12,Nottingham Forest,Crystal Palace,1.0,0.0,1.0,1.0
360,2023-05-20,Fulham,Crystal Palace,2.0,2.0,1.0,0.0
210,2023-02-11,West Ham,Chelsea,1.0,1.0,1.0,0.0
109,2022-10-22,Chelsea,Manchester Utd,1.0,1.0,0.0,0.0
246,2023-03-04,Wolves,Tottenham,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...
215,2023-02-11,Southampton,Wolves,1.0,2.0,1.0,1.0
103,2022-10-19,Manchester Utd,Tottenham,2.0,0.0,0.0,1.0
46,2022-08-31,Manchester City,Nottingham Forest,6.0,0.0,4.0,1.0
92,2022-10-16,Aston Villa,Chelsea,0.0,2.0,1.0,1.0


In [73]:
combined.to_csv('predictions.csv')

In [59]:
tdate = pd.Timestamp.now().date()
tdate

datetime.date(2023, 5, 24)

RANDOM FOREST, 1 TREE/TEAM