In [177]:
import pandas as pd

In [178]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
from tabulate import tabulate

In [179]:
df = pd.read_csv("PremierLeague.csv")

In [180]:
if "Date" in df.columns:
    df["Date"]= pd.to_datetime(df["Date"], errors="coerce")

In [181]:
y = df[["FullTimeHomeTeamGoals", "FullTimeAwayTeamGoals"]]

In [182]:
X = df[["HomeTeam", "AwayTeam", "Season", "MatchWeek","B365HomeTeam", "B365Draw", "B365AwayTeam","Date"]]

In [183]:
X=X.dropna(subset=["Date","MatchWeek"])

In [184]:
y=y.loc[X.index]

In [185]:
categorical_features = ["HomeTeam", "AwayTeam", "Season"]

In [186]:
numeric_features = ["MatchWeek", "B365HomeTeam", "B365Draw", "B365AwayTeam"]

In [187]:
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
        ("num", Pipeline(steps=[("imputer",SimpleImputer(strategy="mean")),]),numeric_features)
    ],
    remainder = "drop"
)

In [188]:
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42))

In [189]:
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", model)
])

In [190]:
train_mask = X["Season"] != "2024-2025"

In [191]:
predict_mask = X["Season"] == "2024-2025"

In [192]:
X_train, y_train = X[train_mask], y[train_mask]

In [193]:
X_pred,y_true = X[predict_mask], y[predict_mask]

In [194]:
pipeline.fit(X_train, y_train)

In [195]:
y_pred=pipeline.predict(X_pred)

In [201]:
pred_home = y_pred[:,0].round().astype(int)

In [202]:
pred_away = y_pred[:,1].round().astype(int)

In [203]:
def result_label(home, away):
    if home > away:
        return "Home Win"
    elif home<away:
        return "Äway Win"
    else:
        return "Draw"

In [204]:
actual_results = [result_label(h,a) for h, a in zip(y_true["FullTimeHomeTeamGoals"], y_true["FullTimeAwayTeamGoals"])]

In [205]:
predicted_results = [result_label(h,a) for h,a in zip(pred_home, pred_away)]

In [206]:
predictions = pd.DataFrame({
    "Date": X_pred["Date"].dt.strftime("%Y-%m-%d"),
    "Match Week": X_pred["MatchWeek"].values,
    "Home Team":X_pred["HomeTeam"].values,
    "Away Team":X_pred["AwayTeam"].values,
    "Actual Result": actual_results,
    "Predicted Result": predicted_results,
    "Actual Home Goals": y_true["FullTimeHomeTeamGoals"].values,
    "Actual Away Goals": y_true["FullTimeAwayTeamGoals"].values,
    "Pred Home Goals": pred_home,
    "Pred Away Goals": pred_away,
})

In [207]:
predictions["Correct"] = predictions["Actual Result"] == predictions["Predicted Result"]

In [208]:
predictions["Actual Score"] = (
    predictions["Actual Home Goals"].astype(str) + "-" + predictions["Actual Away Goals"].astype(str)
)

In [209]:
predictions["Predicted Score"] = (
    predictions["Pred Home Goals"].astype(str) + "-" + predictions["Pred Away Goals"].astype(str)
)

In [210]:
pred_table = predictions[[
    "Date","Match Week", "Home Team","Away Team","Actual Result","Predicted Result","Actual Score","Predicted Score"
]].sort_values(by="Match Week")

In [211]:
print("\n2024-2025 Season EPL Predictions")
print(tabulate(pred_table,headers="keys",tablefmt="grid",showindex=False))


2024-2025 Season EPL Predictions
+------------+--------------+----------------+----------------+-----------------+--------------------+----------------+-------------------+
| Date       |   Match Week | Home Team      | Away Team      | Actual Result   | Predicted Result   | Actual Score   | Predicted Score   |
| 2024-08-16 |            1 | Man United     | Fulham         | Home Win        | Draw               | 1-0            | 2-2               |
+------------+--------------+----------------+----------------+-----------------+--------------------+----------------+-------------------+
| 2024-08-17 |            1 | Ipswich        | Liverpool      | Äway Win        | Äway Win           | 0-2            | 0-2               |
+------------+--------------+----------------+----------------+-----------------+--------------------+----------------+-------------------+
| 2024-08-17 |            1 | Arsenal        | Wolves         | Home Win        | Home Win           | 2-0            | 2-1   

In [212]:
weekly_accuracy = predictions.groupby("Match Week")["Correct"].mean()*100

In [213]:
print("\nWeekly Accuracy")
for week, acc in weekly_accuracy.items():
    print(f"Match Week {week}: {acc:.2f}%")


Weekly Accuracy
Match Week 1: 50.00%
Match Week 2: 80.00%
Match Week 3: 60.00%
Match Week 4: 70.00%
Match Week 5: 50.00%
Match Week 6: 40.00%
Match Week 7: 60.00%
Match Week 8: 40.00%
Match Week 9: 40.00%
Match Week 10: 50.00%
Match Week 11: 60.00%
Match Week 12: 40.00%
Match Week 13: 70.00%
Match Week 14: 60.00%
Match Week 15: 20.00%
Match Week 16: 20.00%
Match Week 17: 50.00%
Match Week 18: 50.00%
Match Week 19: 40.00%
Match Week 20: 40.00%
Match Week 21: 70.00%
Match Week 22: 50.00%
Match Week 23: 30.00%
Match Week 24: 30.00%
Match Week 25: 50.00%
Match Week 26: 50.00%
Match Week 27: 60.00%
Match Week 28: 50.00%
Match Week 29: 30.00%
Match Week 30: 60.00%
Match Week 31: 50.00%
Match Week 32: 60.00%
Match Week 33: 30.00%
Match Week 34: 80.00%
Match Week 35: 40.00%
Match Week 36: 20.00%
Match Week 37: 40.00%
Match Week 38: 50.00%


In [214]:
total_accuracy = predictions["Correct"].mean()*100

In [215]:
print(f"\nTotal Prediction Accuracy: {total_accuracy:.2f}%")


Total Prediction Accuracy: 48.42%


In [217]:
predictions.to_csv("PremierLeauge_2024-2025_Prediction.csv",index=False)