<a href="https://colab.research.google.com/github/Tushar12S/Football-Match-Predictor/blob/main/Football_Match_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
Root = "/content/drive/MyDrive"
os.chdir(Root)

In [3]:
# Cell 1: Import libraries and load the data
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

# Load data
matches = pd.read_csv("matches.csv", index_col=0)


In [4]:
# Cell 2: Data preprocessing
matches["date"] = pd.to_datetime(matches["date"])
matches["h/a"] = matches["venue"].astype("category").cat.codes  # Convert venue to home (1) or away (0)
matches["opp"] = matches["opponent"].astype("category").cat.codes  # Convert opponents to a numeric code
matches["hour"] = matches["time"].str.replace(":.+", "", regex=True).astype("int")  # Convert time to hour
matches["day"] = matches["date"].dt.dayofweek  # Convert day of the week to a number
matches["target"] = (matches["result"] == "W").astype("int")  # Set a win as the target (1)


In [5]:
# Cell 3: Splitting data and setting up the model
train = matches[matches["date"] < '2022-01-01']
test = matches[matches["date"] > '2022-01-01']
predictors = ["h/a", "opp", "hour", "day"]

rf = RandomForestClassifier(n_estimators=100, min_samples_split=10, random_state=1)
rf.fit(train[predictors], train["target"])


In [6]:
# Cell 4: Making predictions and calculating accuracy
preds = rf.predict(test[predictors])
acc = accuracy_score(test["target"], preds)
acc


0.6123188405797102

In [7]:
# Cell 5: Displaying a confusion matrix
combined = pd.DataFrame(dict(actual=test["target"], prediction=preds))
pd.crosstab(index=combined["actual"], columns=combined["prediction"])


prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,140,32
1,75,29


In [8]:
# Cell 6: Calculating precision
precision = precision_score(test["target"], preds)
precision


0.47540983606557374

In [9]:
# Cell 7: Grouping matches and creating rolling averages for a specific team
grouped_matches = matches.groupby("team")
group = grouped_matches.get_group("Manchester United").sort_values("date")

def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group


In [10]:
# Cell 8: Defining columns and applying rolling averages
cols = ["gf", "ga", "sh", "sot", "dist", "fk", "pk", "pkatt"]
new_cols = [f"{c}_rolling" for c in cols]

matches_rolling = matches.groupby("team").apply(lambda x: rolling_averages(x, cols, new_cols))
matches_rolling = matches_rolling.droplevel('team')
matches_rolling.index = range(matches_rolling.shape[0])
matches_rolling


  matches_rolling = matches.groupby("team").apply(lambda x: rolling_averages(x, cols, new_cols))


Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,hour,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
0,2020-10-04,14:00,Premier League,Matchweek 4,6,Home,W,2.0,1.0,Sheffield Utd,...,14,1,2.000000,1.333333,7.666667,3.666667,14.733333,0.666667,0.000000,0.000000
1,2020-10-17,17:30,Premier League,Matchweek 5,5,Away,L,0.0,1.0,Manchester City,...,17,0,1.666667,1.666667,5.333333,3.666667,15.766667,0.000000,0.000000,0.000000
2,2020-10-25,19:15,Premier League,Matchweek 6,6,Home,L,0.0,1.0,Leicester City,...,19,0,1.000000,1.666667,7.000000,3.666667,16.733333,0.666667,0.000000,0.000000
3,2020-11-01,16:30,Premier League,Matchweek 7,6,Away,W,1.0,0.0,Manchester Utd,...,16,1,0.666667,1.000000,9.666667,4.000000,16.033333,1.000000,0.000000,0.000000
4,2020-11-08,19:15,Premier League,Matchweek 8,6,Home,L,0.0,3.0,Aston Villa,...,19,0,0.333333,0.666667,9.666667,2.666667,18.033333,1.000000,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1312,2022-03-13,14:00,Premier League,Matchweek 29,6,Away,W,1.0,0.0,Everton,...,14,1,1.333333,1.000000,12.333333,3.666667,19.300000,0.000000,0.000000,0.000000
1313,2022-03-18,20:00,Premier League,Matchweek 30,4,Home,L,2.0,3.0,Leeds United,...,20,0,1.666667,0.666667,12.333333,4.333333,19.600000,0.000000,0.000000,0.000000
1314,2022-04-02,15:00,Premier League,Matchweek 31,5,Home,W,2.0,1.0,Aston Villa,...,15,1,2.333333,1.000000,13.000000,5.333333,19.833333,0.000000,0.000000,0.000000
1315,2022-04-08,20:00,Premier League,Matchweek 32,4,Away,L,0.0,1.0,Newcastle Utd,...,20,0,1.666667,1.333333,13.000000,5.000000,18.533333,0.000000,0.000000,0.000000


In [11]:
# Cell 9: Defining a function to make predictions and calculate precision
def make_predictions(data, predictors):
    train = data[data["date"] < '2022-01-01']
    test = data[data["date"] > '2022-01-01']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], prediction=preds), index=test.index)
    precision = precision_score(test["target"], preds)
    return combined, precision


In [12]:
# Cell 10: Making predictions on rolling data and calculating precision
combined, precision = make_predictions(matches_rolling, predictors + new_cols)
precision


0.6458333333333334

In [13]:
# Cell 11: Merging combined predictions with matches data
combined = combined.merge(matches_rolling[["date", "team", "opponent", "result"]], left_index=True, right_index=True)
combined


Unnamed: 0,actual,prediction,date,team,opponent,result
55,0,1,2022-01-23,Arsenal,Burnley,D
56,1,0,2022-02-10,Arsenal,Wolves,W
57,1,0,2022-02-19,Arsenal,Brentford,W
58,1,1,2022-02-24,Arsenal,Wolves,W
59,1,1,2022-03-06,Arsenal,Watford,W
...,...,...,...,...,...,...
1312,1,0,2022-03-13,Wolverhampton Wanderers,Everton,W
1313,0,0,2022-03-18,Wolverhampton Wanderers,Leeds United,L
1314,1,0,2022-04-02,Wolverhampton Wanderers,Aston Villa,W
1315,0,1,2022-04-08,Wolverhampton Wanderers,Newcastle Utd,L


In [14]:
# Cell 12: Creating a custom dictionary class for missing keys
class MissingDict(dict):
    __missing__ = lambda self, key: key

map_values = {
    "Brighton and Hove Albion": "Brighton",
    "Manchester United": "Manchester Utd",
    "Tottenham Hotspur": "Tottenham",
    "West Ham United": "West Ham",
    "Wolverhampton Wanderers": "Wolves"
}
mapping = MissingDict(**map_values)
mapping["West Ham United"]


'West Ham'

In [15]:
# Cell 13: Mapping team names in combined predictions
combined["new_team"] = combined["team"].map(mapping)
combined


Unnamed: 0,actual,prediction,date,team,opponent,result,new_team
55,0,1,2022-01-23,Arsenal,Burnley,D,Arsenal
56,1,0,2022-02-10,Arsenal,Wolves,W,Arsenal
57,1,0,2022-02-19,Arsenal,Brentford,W,Arsenal
58,1,1,2022-02-24,Arsenal,Wolves,W,Arsenal
59,1,1,2022-03-06,Arsenal,Watford,W,Arsenal
...,...,...,...,...,...,...,...
1312,1,0,2022-03-13,Wolverhampton Wanderers,Everton,W,Wolves
1313,0,0,2022-03-18,Wolverhampton Wanderers,Leeds United,L,Wolves
1314,1,0,2022-04-02,Wolverhampton Wanderers,Aston Villa,W,Wolves
1315,0,1,2022-04-08,Wolverhampton Wanderers,Newcastle Utd,L,Wolves


In [16]:
# Cell 14: Merging home and away team predictions
merged = combined.merge(combined, left_on=["date", "new_team"], right_on=["date", "opponent"])
merged


Unnamed: 0,actual_x,prediction_x,date,team_x,opponent_x,result_x,new_team_x,actual_y,prediction_y,team_y,opponent_y,result_y,new_team_y
0,0,1,2022-01-23,Arsenal,Burnley,D,Arsenal,0,0,Burnley,Arsenal,D,Burnley
1,1,0,2022-02-10,Arsenal,Wolves,W,Arsenal,0,0,Wolverhampton Wanderers,Arsenal,L,Wolves
2,1,0,2022-02-19,Arsenal,Brentford,W,Arsenal,0,0,Brentford,Arsenal,L,Brentford
3,1,1,2022-02-24,Arsenal,Wolves,W,Arsenal,0,0,Wolverhampton Wanderers,Arsenal,L,Wolves
4,1,1,2022-03-06,Arsenal,Watford,W,Arsenal,0,0,Watford,Arsenal,L,Watford
...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,1,0,2022-03-13,Wolverhampton Wanderers,Everton,W,Wolves,0,0,Everton,Wolves,L,Everton
243,0,0,2022-03-18,Wolverhampton Wanderers,Leeds United,L,Wolves,1,0,Leeds United,Wolves,W,Leeds United
244,1,0,2022-04-02,Wolverhampton Wanderers,Aston Villa,W,Wolves,0,0,Aston Villa,Wolves,L,Aston Villa
245,0,1,2022-04-08,Wolverhampton Wanderers,Newcastle Utd,L,Wolves,1,0,Newcastle United,Wolves,W,Newcastle United
