In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

matches = pd.read_csv("matches.csv", index_col=0)
matches["Date"] = pd.to_datetime(matches["Date"], errors='coerce')
matches = matches.dropna(subset=["Date"])
matches["target"] = (matches["Result"] == "W").astype("int")

In [None]:
matches["venue_code"] = matches["Home"].astype("int")
matches["opp_code"] = matches["Opponent"].astype("category").cat.codes
matches["day_code"] = matches["Date"].dt.dayofweek
matches["month"] = matches["Date"].dt.month

predictors = ["venue_code", "opp_code", "day_code", "month"]

rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

train = matches[matches["Date"] < '2024-01-01']
test = matches[matches["Date"] >= '2024-01-01']

rf.fit(train[predictors], train["target"])
preds = rf.predict(test[predictors])

error = accuracy_score(test["target"], preds)
combined = pd.DataFrame(dict(actual=test["target"], predicted=preds))
crosstab = pd.crosstab(index=combined["actual"], columns=combined["predicted"], rownames=['Actual'], colnames=['Predicted'])
precision = precision_score(test["target"], preds)

In [None]:
matches.index = range(matches.shape[0])

In [None]:
def make_predictions(data, predictors):
  train = data[data["Date"] < '2024-01-01']
  test = data[data["Date"] >= '2024-01-01']
  rf.fit(train[predictors], train["target"])
  preds = rf.predict(test[predictors])
  combined = pd.DataFrame(dict(actual=test["target"], predicted=preds), index=test.index)
  error = precision_score(test["target"], preds)
  return combined, error

combined, error = make_predictions(matches, predictors)

print("Accuracy:", error)
print("Precision:", precision)
print(crosstab)