<a href="https://colab.research.google.com/github/ccstevie/nhl_model/blob/main/NHL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
from datetime import date, timedelta
from functools import reduce

In [2]:
# we will use the last 30 days as our data range
today = date.today()
print("Today's date:", today)
start = today - timedelta(days=30)
print("30 days ago:", start)

Today's date: 2024-03-05
30 days ago: 2024-02-04


In [3]:
url = f"https://www.naturalstattrick.com/teamtable.php?fromseason=20232024&thruseason=20232024&stype=2&sit=5v5&score=all&rate=n&team=all&loc=B&gpf=410&fd={start}&td={today}"
req = requests.get(url)
req.status_code

200

In [4]:
df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
df.head()

Unnamed: 0,Team,GP,TOI,W,L,OTL,ROW,Points,Point %,CF,...,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO
1,Colorado Avalanche,14,687:17,6,6,2,6,14,0.5,684,...,167,49.39,5,5,50.0,3.07,97.01,8.19,93.2,1.014
2,New York Rangers,13,616:03,10,2,1,10,21,0.808,578,...,154,47.62,10,3,76.92,7.14,98.05,8.54,94.69,1.032
3,New York Islanders,11,543:13,6,3,2,6,14,0.636,482,...,117,50.63,4,6,40.0,3.33,94.87,9.6,92.05,1.017
4,Toronto Maple Leafs,14,707:46,10,4,0,9,20,0.714,718,...,129,53.76,6,3,66.67,4.0,97.67,10.25,89.93,1.002
5,Calgary Flames,12,570:52,8,4,0,8,16,0.667,627,...,129,51.32,6,4,60.0,4.41,96.9,9.44,91.49,1.009


In [5]:
# sort by CF% GF% XGF% HDCF% SH% and obtain rankings for each team
cf = df.sort_values(by="CF%", ascending=False, ignore_index=True)
cf.index += 1
cf.reset_index(inplace=True)
cf = cf.rename(columns = {"index":"CF%"})
cf = cf[["Team", "CF%"]]
# print cf as an example
cf

Unnamed: 0,Team,CF%,CF%.1
0,Carolina Hurricanes,1,57.94
1,Edmonton Oilers,2,56.89
2,Vancouver Canucks,3,54.63
3,Florida Panthers,4,54.09
4,Philadelphia Flyers,5,52.98
5,New Jersey Devils,6,52.97
6,Arizona Coyotes,7,52.31
7,Nashville Predators,8,52.28
8,Calgary Flames,9,52.25
9,Buffalo Sabres,10,51.78


In [6]:
gf = df.sort_values(by="GF%", ascending=False, ignore_index=True)
gf.index += 1
gf.reset_index(inplace=True)
gf = gf.rename(columns = {"index":"GF%"})
gf = gf[["Team", "GF%"]]

In [7]:
xgf = df.sort_values(by="xGF%", ascending=False, ignore_index=True)
xgf.index += 1
xgf.reset_index(inplace=True)
xgf = xgf.rename(columns = {"index":"xGF%"})
xgf = xgf[["Team", "xGF%"]]

In [8]:
hdcf = df.sort_values(by="HDCF%", ascending=False, ignore_index=True)
hdcf.index += 1
hdcf.reset_index(inplace=True)
hdcf = hdcf.rename(columns = {"index":"HDCF%"})
hdcf = hdcf[["Team", "HDCF%"]]

In [9]:
sh = df.sort_values(by="SH%", ascending=False, ignore_index=True)
sh.index += 1
sh.reset_index(inplace=True)
sh = sh.rename(columns = {"index":"SH%"})
sh = sh[["Team", "SH%"]]

In [10]:
# merge all DataFrames into one
dfs = [cf, gf, xgf, hdcf, sh]
final_df = reduce(lambda  left,right: pd.merge(left,right,on=['Team'],
                                            how='outer'), dfs)
final_df

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Carolina Hurricanes,1,57.94,2,60.98,2,57.04,2,57.41,23,8.01
1,Edmonton Oilers,2,56.89,4,56.92,1,59.26,1,64.43,8,9.61
2,Vancouver Canucks,3,54.63,19,50.0,7,53.85,5,55.04,12,9.33
3,Florida Panthers,4,54.09,1,66.67,4,54.91,7,53.19,27,7.38
4,Philadelphia Flyers,5,52.98,15,50.94,8,53.45,8,53.16,19,8.21
5,New Jersey Devils,6,52.97,20,49.18,5,54.77,9,52.83,24,7.89
6,Arizona Coyotes,7,52.31,28,42.86,13,51.67,14,50.45,15,8.6
7,Nashville Predators,8,52.28,3,60.42,21,48.87,22,48.15,3,10.32
8,Calgary Flames,9,52.25,12,52.94,14,51.08,23,48.11,10,9.44
9,Buffalo Sabres,10,51.78,10,53.85,19,49.06,12,51.69,29,6.95


In [11]:
# ex. get a team's stats
final_df.loc[final_df["Team"] == "Toronto Maple Leafs"]

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
14,Toronto Maple Leafs,15,50.71,5,56.06,6,54.01,4,55.47,4,10.25


In [12]:
from get_todays_games import getGames

matchups = getGames()
matchups

[('Panthers', 'Devils'),
 ('Blue Jackets', 'Penguins'),
 ('Oilers', 'Bruins'),
 ('Blues', 'Islanders'),
 ('Canadiens', 'Predators'),
 ('Kraken', 'Jets'),
 ('Blackhawks', 'Coyotes'),
 ('Canucks', 'Kings'),
 ('Stars', 'Sharks')]

In [13]:
res = pd.DataFrame()

for away, home in matchups:
    away_df = final_df[final_df["Team"].str.contains(away)]
    home_df = final_df[final_df["Team"].str.contains(home)]
    matchup_df = pd.concat([away_df, home_df], ignore_index=True)
    res = pd.concat([res, matchup_df], ignore_index=True)
res


Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Florida Panthers,4,54.09,1,66.67,4,54.91,7,53.19,27,7.38
1,New Jersey Devils,6,52.97,20,49.18,5,54.77,9,52.83,24,7.89
2,Columbus Blue Jackets,20,49.3,9,54.9,12,52.03,21,48.62,7,9.89
3,Pittsburgh Penguins,18,50.04,26,45.28,24,48.2,15,50.42,26,7.59
4,Edmonton Oilers,2,56.89,4,56.92,1,59.26,1,64.43,8,9.61
5,Boston Bruins,26,46.06,23,48.21,18,49.19,18,49.64,14,8.79
6,St Louis Blues,29,44.08,27,45.24,32,39.33,31,37.44,21,8.09
7,New York Islanders,25,46.53,6,55.81,16,49.82,16,50.0,9,9.6
8,Montreal Canadiens,24,48.07,17,50.0,11,52.21,19,49.32,6,9.89
9,Nashville Predators,8,52.28,3,60.42,21,48.87,22,48.15,3,10.32


In [14]:
f = open("result.csv", 'w')

for col in res.columns.values:
    f.write(col + ",")

f.write("\n")

i = 0
for col in res.values:
    for row in col:
        f.write(str(row) + ",")
    if i % 2 == 0:
        f.write("\n")
    else:
        f.write("\n\n")
    i += 1

f.close()