In [28]:
import requests
import time
from bs4 import BeautifulSoup
import pandas as pd

In [29]:
matches = pd.read_csv("matchesplusgroupstage.csv", index_col = 0)

matches["Date"] = pd.to_datetime(matches["Date"])
matches["Venue_Code"] = matches["Venue"].astype("category").cat.codes
matches["Opp_Code"] = matches["Opponent"].astype("category").cat.codes
matches["Hour"] = matches["Time"].str.replace(":.+", "", regex = True).astype("int")
matches["Day_Code"] = matches["Date"].dt.dayofweek
matches["Target"] = (matches["Result"] == "W").astype("int")

In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

In [31]:
rf = RandomForestClassifier(n_estimators = 50, min_samples_split = 10, random_state = 1)

In [36]:
def rolling_averages(group, cols, new_cols):
  group = group.sort_values("Date")
  for col in cols:
    group[col] = pd.to_numeric(group[col], errors='coerce')
  rolling_stats = group[cols].rolling(3, closed = "left").mean()
  group[new_cols] = rolling_stats
  group = group.dropna(subset = new_cols)
  return group

def make_predictions(data, predictors):
  train = data[(data["Date"] < '2024-6-14')]
  test = data[(data["Date"] >= '2024-6-14')]
  predictors = ["Venue_Code", "Opp_Code", "Hour", "Day_Code"]
  rf.fit(train[predictors], train["Target"])
  preds = rf.predict(test[predictors])
  combined = pd.DataFrame(dict(actual = test["Target"], prediction = preds))
  precision = precision_score(test["Target"], preds)
  return combined, precision

In [37]:
predictors = ["Venue_Code", "Opp_Code", "Hour", "Day_Code"]

cols = ["Poss", "GF", "GA", "Sh", "SoT", "PK", "PKatt"]
new_cols = [f"{c}_Rolling" for c in cols]

matches_rolling = matches.groupby("Team").apply(lambda x: rolling_averages(x, cols, new_cols))
matches_rolling = matches_rolling.droplevel('Team')
matches_rolling.index = range(matches_rolling.shape[0])

combined, precision = make_predictions(matches_rolling, predictors + new_cols)
combined = combined.merge(matches_rolling[["Date", "Team", "Opponent", "Result"]], left_index = True, right_index = True)

combined.to_csv("past_predictions.csv")
print(precision)

0.32608695652173914
