<a href="https://colab.research.google.com/github/ccstevie/nhl_model/blob/main/NHL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
from datetime import date, timedelta
from functools import reduce

In [2]:
# we will use the last 30 days as our data range
today = date.today()
print("Today's date:", today)
start = today - timedelta(days=30)
print("30 days ago:", start)

Today's date: 2024-03-21
30 days ago: 2024-02-20


In [3]:
url = f"https://www.naturalstattrick.com/teamtable.php?fromseason=20232024&thruseason=20232024&stype=2&sit=5v5&score=all&rate=n&team=all&loc=B&gpf=410&fd={start}&td={today}"
req = requests.get(url)
req.status_code

200

In [4]:
df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
df.head()

Unnamed: 0,Team,GP,TOI,W,L,OTL,ROW,Points,Point %,CF,...,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO
1,Ottawa Senators,15,748:02,5,8,2,3,12,0.4,735,...,171,47.87,3,6,33.33,1.91,96.49,6.72,91.11,0.978
2,Florida Panthers,13,593:20,9,4,0,8,18,0.692,620,...,97,62.11,2,3,40.0,1.26,96.91,5.79,93.13,0.989
3,Dallas Stars,14,700:33,8,5,1,7,17,0.607,733,...,145,52.92,4,5,44.44,2.45,96.55,8.29,90.1,0.984
4,New York Rangers,14,663:11,9,4,1,9,19,0.679,649,...,155,49.35,8,4,66.67,5.3,97.42,8.55,94.66,1.032
5,New York Islanders,14,695:00,7,6,1,7,15,0.536,610,...,165,46.43,5,4,55.56,3.5,97.58,9.75,90.6,1.003


In [5]:
# sort by CF% GF% XGF% HDCF% SH% and obtain rankings for each team
cf = df.sort_values(by="CF%", ascending=False, ignore_index=True)
cf.index += 1
cf.reset_index(inplace=True)
cf = cf.rename(columns = {"index":"CF%"})
cf = cf[["Team", "CF%"]]
# print cf as an example
cf

Unnamed: 0,Team,CF%,CF%.1
0,Carolina Hurricanes,1,58.08
1,Nashville Predators,2,55.35
2,Colorado Avalanche,3,54.75
3,Dallas Stars,4,54.46
4,Vancouver Canucks,5,53.7
5,Edmonton Oilers,6,53.57
6,Florida Panthers,7,52.86
7,New Jersey Devils,8,52.86
8,Philadelphia Flyers,9,51.46
9,Arizona Coyotes,10,51.15


In [6]:
gf = df.sort_values(by="GF%", ascending=False, ignore_index=True)
gf.index += 1
gf.reset_index(inplace=True)
gf = gf.rename(columns = {"index":"GF%"})
gf = gf[["Team", "GF%"]]

In [7]:
xgf = df.sort_values(by="xGF%", ascending=False, ignore_index=True)
xgf.index += 1
xgf.reset_index(inplace=True)
xgf = xgf.rename(columns = {"index":"xGF%"})
xgf = xgf[["Team", "xGF%"]]

In [8]:
hdcf = df.sort_values(by="HDCF%", ascending=False, ignore_index=True)
hdcf.index += 1
hdcf.reset_index(inplace=True)
hdcf = hdcf.rename(columns = {"index":"HDCF%"})
hdcf = hdcf[["Team", "HDCF%"]]

In [9]:
sh = df.sort_values(by="SH%", ascending=False, ignore_index=True)
sh.index += 1
sh.reset_index(inplace=True)
sh = sh.rename(columns = {"index":"SH%"})
sh = sh[["Team", "SH%"]]

In [10]:
# merge all DataFrames into one
dfs = [cf, gf, xgf, hdcf, sh]
final_df = reduce(lambda  left,right: pd.merge(left,right,on=['Team'],
                                            how='outer'), dfs)
final_df

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Carolina Hurricanes,1,58.08,4,61.11,1,58.41,3,58.92,12,9.02
1,Nashville Predators,2,55.35,1,70.97,2,57.87,1,61.04,4,11.49
2,Colorado Avalanche,3,54.75,2,66.67,3,54.61,5,56.39,7,9.76
3,Dallas Stars,4,54.46,16,50.0,4,54.51,10,52.51,19,8.29
4,Vancouver Canucks,5,53.7,19,48.72,5,54.44,2,60.18,29,6.64
5,Edmonton Oilers,6,53.57,8,57.89,8,53.44,4,57.91,15,8.59
6,Florida Panthers,7,52.86,15,50.0,23,47.55,28,44.29,32,5.79
7,New Jersey Devils,8,52.86,24,45.59,11,51.44,12,52.11,18,8.42
8,Philadelphia Flyers,9,51.46,20,48.21,13,51.03,25,45.62,17,8.49
9,Arizona Coyotes,10,51.15,11,52.94,18,49.75,22,48.5,20,8.16


In [11]:
# ex. get a team's stats
final_df.loc[final_df["Team"] == "Toronto Maple Leafs"]

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
11,Toronto Maple Leafs,12,50.95,7,59.15,10,51.49,8,53.38,2,11.9


In [12]:
from get_todays_games import getGames

matchups = getGames()
matchups

[('Rangers', 'Bruins'),
 ('Flyers', 'Hurricanes'),
 ('Islanders', 'Red Wings'),
 ('Jets', 'Devils'),
 ('Blues', 'Senators'),
 ('Predators', 'Panthers'),
 ('Sabres', 'Oilers'),
 ('Blackhawks', 'Ducks'),
 ('Canadiens', 'Canucks'),
 ('Lightning', 'Sharks'),
 ('Kraken', 'Golden Knights')]

In [13]:
res = pd.DataFrame()

for away, home in matchups:
    away_df = final_df[final_df["Team"].str.contains(away)]
    home_df = final_df[final_df["Team"].str.contains(home)]
    matchup_df = pd.concat([away_df, home_df], ignore_index=True)
    res = pd.concat([res, matchup_df], ignore_index=True)
res


Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,New York Rangers,26,47.17,9,57.78,30,42.5,30,42.36,16,8.55
1,Boston Bruins,30,45.92,10,55.56,6,53.46,9,53.23,5,11.44
2,Philadelphia Flyers,9,51.46,20,48.21,13,51.03,25,45.62,17,8.49
3,Carolina Hurricanes,1,58.08,4,61.11,1,58.41,3,58.92,12,9.02
4,New York Islanders,25,47.73,14,50.82,14,50.92,6,54.19,8,9.75
5,Detroit Red Wings,28,46.78,27,42.19,25,47.17,26,44.96,14,8.77
6,Winnipeg Jets,18,49.19,5,60.34,21,49.49,23,47.44,11,9.07
7,New Jersey Devils,8,52.86,24,45.59,11,51.44,12,52.11,18,8.42
8,St Louis Blues,32,42.72,23,45.83,32,39.75,32,37.16,26,7.36
9,Ottawa Senators,22,48.2,25,42.86,16,50.05,15,50.0,28,6.72


In [14]:
f = open("result.csv", 'w')

for col in res.columns.values:
    f.write(col + ",")

f.write("\n")

i = 0
for col in res.values:
    for row in col:
        f.write(str(row) + ",")
    if i % 2 == 0:
        f.write("\n")
    else:
        f.write("\n\n")
    i += 1

f.close()