### NHL-ML 

Random Forest based prediction of Toronto Maple Leafs NHL games. 
Training data is in 2023, testing in 2024.

In [53]:
import pandas as pd

games = pd.read_csv("leafs_data.csv")
games["Date"] = pd.to_datetime(games["Date"])
games["Day"] = games["Date"].dt.day
games["Month"] = games["Date"].dt.month
games["DayOfWeek"] = games["Date"].dt.dayofweek
games["VenueCode"] = games["Venue"].astype("category").cat.codes
games["OpponentCode"] = games["Opp"].astype("category").cat.codes
games["target"] = (games["Rslt"] == "W").astype("int")
games.head()


Unnamed: 0,Rk,Gtm,Date,Venue,Opp,Rslt,GF,GA,OT,SOGH,...,FA,FF%,oZS%,PDO,Day,Month,DayOfWeek,VenueCode,OpponentCode,target
0,1,1,2024-10-09,@,MTL,L,0,1,,48,...,21,75.0,60.0,96.3,9,10,2,0,14,0
1,2,2,2024-10-10,@,NJD,W,4,2,,22,...,27,47.1,44.0,112.4,10,10,3,0,15,1
2,3,3,2024-10-12,,PIT,W,4,2,,33,...,27,52.6,40.9,103.3,12,10,5,-1,21,1
3,4,4,2024-10-16,,LAK,W,6,2,,26,...,45,30.8,33.3,117.6,16,10,2,-1,12,1
4,5,5,2024-10-19,,NYR,L,1,4,,35,...,33,54.8,47.8,89.9,19,10,5,-1,18,0


In [54]:
from sklearn.ensemble import RandomForestClassifier


In [55]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)


In [64]:
train = games[games["Date"] < "2025-01-01"]
test = games[games["Date"] >= "2025-01-01"]
predictors = [
	"Day",
	"Month",
	"DayOfWeek",
	"VenueCode",
	"OpponentCode",
]
rf.fit(train[predictors], train["target"])


In [65]:
preds = rf.predict(test[predictors])
preds

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [66]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(test["target"], preds)
acc

0.6136363636363636

In [68]:
combined = pd.DataFrame(dict(actual=test["target"], predicted=preds))


In [69]:
pd.crosstab(index=combined["actual"], columns=combined["predicted"], rownames=["Actual"], colnames=["Predicted"])

Predicted,0,1
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,4,11
1,6,23


In [70]:
from sklearn.metrics import precision_score
precision_score(test["target"], preds)

0.6764705882352942

In [97]:
def rolling_averages(group, cols, new_cols):
	group = group.sort_values("Date")
	rolling_stats = group[cols].rolling(3, closed='left').mean()
	group[new_cols] = rolling_stats
	group = group.dropna(subset=new_cols)
	return group

In [138]:
cols = ["GF", "GA", "PPGH", "PPGA", "FO%", "CF%", "FF%", "oZS%", "PDO"]
new_cols = ["GF_rolling", "GA_rolling", "PPGH_rolling", "PPGA_rolling", "FO%", "CF%_rolling", "FF%_rolling", "oZS%_rolling", "PDO_rolling"]
new_cols

['GF_rolling',
 'GA_rolling',
 'PPGH_rolling',
 'PPGA_rolling',
 'FO%',
 'CF%_rolling',
 'FF%_rolling',
 'oZS%_rolling',
 'PDO_rolling']

In [139]:
rolling_games = rolling_averages(games, cols, new_cols)

In [154]:
def make_predictions(data, predictors):
	train = data[data["Date"] < "2025-01-01"]
	test = data[data["Date"] >= "2025-01-01"]
	rf.fit(train[predictors], train["target"])
	preds = rf.predict(test[predictors])
	combined = pd.DataFrame(dict(actual=test["target"], predicted=preds))
	precision = precision_score(test["target"], preds)
	return combined, precision

In [158]:
combined, precision = make_predictions(rolling_games, predictors + new_cols)
print(precision)
print(combined)

0.75
    actual  predicted
38       1          1
39       1          1
40       1          0
41       1          1
42       0          0
43       0          1
44       0          1
45       1          1
46       1          0
47       1          1
48       0          0
49       0          0
50       0          1
51       1          0
52       1          1
53       1          1
54       0          0
55       1          1
56       1          0
57       1          0
58       1          0
59       1          0
60       0          0
61       0          1
62       0          1
63       1          1
64       0          1
65       0          0
66       1          1
67       1          1
68       1          1
69       0          0
70       1          1
71       0          0
72       1          0
73       1          0
74       1          1
75       1          1
76       0          0
77       1          0
78       1          0
79       1          1
80       1          1
81       1          1
