<a href="https://colab.research.google.com/github/ccstevie/nhl_model/blob/main/NHL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
from datetime import date, timedelta
from functools import reduce

In [2]:
# we will use the last 30 days as our data range
today = date.today()
print("Today's date:", today)
start = today - timedelta(days=30)
print("30 days ago:", start)

Today's date: 2024-04-30
30 days ago: 2024-03-31


In [None]:
url = f"https://www.naturalstattrick.com/teamtable.php?fromseason=20242025&thruseason=20242025&stype=2&sit=5v5&score=all&rate=n&team=all&loc=B&gpf=410&fd={start}&td={today}"
req = requests.get(url)
req.status_code

200

In [4]:
df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
df.head()

Unnamed: 0,Team,GP,TOI,W,L,OTL,ROW,Points,Point %,CF,...,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO
1,Anaheim Ducks,8,395:44,3,4,1,3,7,0.438,333,...,84,49.09,5,0,100.0,6.17,100.0,11.18,92.86,1.04
2,Vancouver Canucks,9,425:47,5,3,1,5,11,0.611,444,...,96,50.26,3,3,50.0,3.09,96.88,7.92,90.0,0.979
3,Colorado Avalanche,8,393:28,3,4,1,3,7,0.438,419,...,85,53.3,4,3,57.14,4.12,96.47,9.76,89.74,0.995
4,Columbus Blue Jackets,8,411:08,3,5,0,3,6,0.375,383,...,126,42.2,5,4,55.56,5.43,96.83,10.2,92.19,1.024
5,Pittsburgh Penguins,9,434:56,6,2,1,6,13,0.722,452,...,95,49.47,6,5,54.55,6.45,94.74,14.0,89.86,1.039


In [5]:
# sort by CF% GF% XGF% HDCF% SH% and obtain rankings for each team
cf = df.sort_values(by="CF%", ascending=False, ignore_index=True)
cf.index += 1
cf.reset_index(inplace=True)
cf = cf.rename(columns = {"index":"CF%"})
cf = cf[["Team", "CF%"]]
# print cf as an example
cf

Unnamed: 0,Team,CF%,CF%.1
0,Carolina Hurricanes,1,62.33
1,Florida Panthers,2,56.94
2,New York Rangers,3,56.68
3,Edmonton Oilers,4,55.3
4,Dallas Stars,5,55.1
5,Nashville Predators,6,54.93
6,Vancouver Canucks,7,54.61
7,Calgary Flames,8,54.48
8,Ottawa Senators,9,54.03
9,Los Angeles Kings,10,53.97


In [6]:
gf = df.sort_values(by="GF%", ascending=False, ignore_index=True)
gf.index += 1
gf.reset_index(inplace=True)
gf = gf.rename(columns = {"index":"GF%"})
gf = gf[["Team", "GF%"]]

In [7]:
xgf = df.sort_values(by="xGF%", ascending=False, ignore_index=True)
xgf.index += 1
xgf.reset_index(inplace=True)
xgf = xgf.rename(columns = {"index":"xGF%"})
xgf = xgf[["Team", "xGF%"]]

In [8]:
hdcf = df.sort_values(by="HDCF%", ascending=False, ignore_index=True)
hdcf.index += 1
hdcf.reset_index(inplace=True)
hdcf = hdcf.rename(columns = {"index":"HDCF%"})
hdcf = hdcf[["Team", "HDCF%"]]

In [9]:
sh = df.sort_values(by="SH%", ascending=False, ignore_index=True)
sh.index += 1
sh.reset_index(inplace=True)
sh = sh.rename(columns = {"index":"SH%"})
sh = sh[["Team", "SH%"]]

In [10]:
# merge all DataFrames into one
dfs = [cf, gf, xgf, hdcf, sh]
final_df = reduce(lambda  left,right: pd.merge(left,right,on=['Team'],
                                            how='outer'), dfs)
final_df

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Carolina Hurricanes,1,62.33,28,42.31,1,64.4,1,64.44,31,5.73
1,Florida Panthers,2,56.94,2,66.67,4,57.15,7,57.14,7,11.06
2,New York Rangers,3,56.68,27,42.42,5,56.74,8,56.34,29,6.19
3,Edmonton Oilers,4,55.3,6,57.45,3,58.01,6,57.35,14,9.51
4,Dallas Stars,5,55.1,5,57.69,2,61.19,2,62.96,23,7.81
5,Nashville Predators,6,54.93,20,48.28,8,54.73,5,57.86,30,5.88
6,Vancouver Canucks,7,54.61,21,47.06,6,55.96,3,61.64,21,7.92
7,Calgary Flames,8,54.48,22,46.15,13,51.48,15,51.22,24,7.38
8,Ottawa Senators,9,54.03,32,24.14,22,47.37,28,40.94,32,3.76
9,Los Angeles Kings,10,53.97,10,55.56,9,54.57,14,51.49,15,9.35


In [11]:
# ex. get a team's stats
final_df.loc[final_df["Team"] == "Toronto Maple Leafs"]

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
10,Toronto Maple Leafs,11,53.93,18,48.98,7,54.81,9,54.72,5,11.48


In [12]:
from get_todays_games import getGames

matchups = getGames()
matchups

[('Maple Leafs', 'Bruins'),
 ('Islanders', 'Hurricanes'),
 ('Avalanche', 'Jets'),
 ('Predators', 'Canucks')]

In [13]:
res = pd.DataFrame()

for away, home in matchups:
    away_df = final_df[final_df["Team"].str.contains(away)]
    home_df = final_df[final_df["Team"].str.contains(home)]
    matchup_df = pd.concat([away_df, home_df], ignore_index=True)
    res = pd.concat([res, matchup_df], ignore_index=True)
res


Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Toronto Maple Leafs,11,53.93,18,48.98,7,54.81,9,54.72,5,11.48
1,Boston Bruins,29,43.96,12,52.63,25,46.38,21,48.06,27,6.9
2,New York Islanders,18,48.34,3,66.67,15,51.01,10,54.64,18,8.7
3,Carolina Hurricanes,1,62.33,28,42.31,1,64.4,1,64.44,31,5.73
4,Colorado Avalanche,13,52.57,16,50.0,21,47.53,13,51.61,12,9.76
5,Winnipeg Jets,28,44.96,1,67.57,18,49.86,17,50.35,3,13.74
6,Nashville Predators,6,54.93,20,48.28,8,54.73,5,57.86,30,5.88
7,Vancouver Canucks,7,54.61,21,47.06,6,55.96,3,61.64,21,7.92


In [14]:
f = open("result.csv", 'w')

for col in res.columns.values:
    f.write(col + ",")

f.write("\n")

i = 0
for col in res.values:
    for row in col:
        f.write(str(row) + ",")
    if i % 2 == 0:
        f.write("\n")
    else:
        f.write("\n\n")
    i += 1

f.close()