<a href="https://colab.research.google.com/github/ccstevie/nhl_model/blob/main/NHL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
from datetime import date, timedelta
from functools import reduce

In [2]:
# we will use the last 30 days as our data range
today = date.today()
print("Today's date:", today)
start = today - timedelta(days=30)
print("30 days ago:", start)

Today's date: 2024-02-29
30 days ago: 2024-01-30


In [3]:
url = f"https://www.naturalstattrick.com/teamtable.php?fromseason=20232024&thruseason=20232024&stype=2&sit=5v5&score=all&rate=n&team=all&loc=B&gpf=410&fd={start}&td={today}"
req = requests.get(url)
req.status_code

200

In [4]:
df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
df.head()

Unnamed: 0,Team,GP,TOI,W,L,OTL,ROW,Points,Point %,CF,...,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO
1,Columbus Blue Jackets,9,431:36,4,5,0,4,8,0.444,472,...,121,47.62,3,6,33.33,2.73,95.04,8.05,93.67,1.017
2,St Louis Blues,11,519:29,4,6,1,4,9,0.409,446,...,136,45.16,3,4,42.86,2.68,97.06,7.83,92.36,1.002
3,Seattle Kraken,9,434:25,4,4,1,2,9,0.5,460,...,95,52.02,7,4,63.64,6.8,95.79,6.57,93.46,1.0
4,San Jose Sharks,8,392:46,2,5,1,2,5,0.313,368,...,104,44.68,4,6,40.0,4.76,94.23,7.26,90.83,0.981
5,Ottawa Senators,11,555:31,6,4,1,5,13,0.591,547,...,116,48.44,1,5,16.67,0.92,95.69,8.95,89.45,0.984


In [5]:
# sort by CF% GF% XGF% HDCF% SH% and obtain rankings for each team
cf = df.sort_values(by="CF%", ascending=False, ignore_index=True)
cf.index += 1
cf.reset_index(inplace=True)
cf = cf.rename(columns = {"index":"CF%"})
cf = cf[["Team", "CF%"]]
# print cf as an example
cf

Unnamed: 0,Team,CF%,CF%.1
0,Carolina Hurricanes,1,57.47
1,Edmonton Oilers,2,57.42
2,Vancouver Canucks,3,53.75
3,Florida Panthers,4,53.29
4,Seattle Kraken,5,52.69
5,Philadelphia Flyers,6,52.57
6,Calgary Flames,7,52.19
7,Los Angeles Kings,8,52.14
8,New Jersey Devils,9,52.08
9,Tampa Bay Lightning,10,51.83


In [6]:
gf = df.sort_values(by="GF%", ascending=False, ignore_index=True)
gf.index += 1
gf.reset_index(inplace=True)
gf = gf.rename(columns = {"index":"GF%"})
gf = gf[["Team", "GF%"]]

In [7]:
xgf = df.sort_values(by="xGF%", ascending=False, ignore_index=True)
xgf.index += 1
xgf.reset_index(inplace=True)
xgf = xgf.rename(columns = {"index":"xGF%"})
xgf = xgf[["Team", "xGF%"]]

In [8]:
hdcf = df.sort_values(by="HDCF%", ascending=False, ignore_index=True)
hdcf.index += 1
hdcf.reset_index(inplace=True)
hdcf = hdcf.rename(columns = {"index":"HDCF%"})
hdcf = hdcf[["Team", "HDCF%"]]

In [9]:
sh = df.sort_values(by="SH%", ascending=False, ignore_index=True)
sh.index += 1
sh.reset_index(inplace=True)
sh = sh.rename(columns = {"index":"SH%"})
sh = sh[["Team", "SH%"]]

In [10]:
# merge all DataFrames into one
dfs = [cf, gf, xgf, hdcf, sh]
final_df = reduce(lambda  left,right: pd.merge(left,right,on=['Team'],
                                            how='outer'), dfs)
final_df

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Carolina Hurricanes,1,57.47,2,62.5,2,57.41,2,56.52,22,7.78
1,Edmonton Oilers,2,57.42,16,51.79,1,59.59,1,65.52,16,8.76
2,Vancouver Canucks,3,53.75,13,53.06,7,53.21,6,53.69,5,10.32
3,Florida Panthers,4,53.29,1,65.52,3,55.05,3,55.61,26,7.31
4,Seattle Kraken,5,52.69,18,50.0,18,49.61,25,45.86,28,6.57
5,Philadelphia Flyers,6,52.57,10,53.49,8,53.16,14,51.65,12,8.95
6,Calgary Flames,7,52.19,6,54.76,11,52.0,22,48.9,9,9.31
7,Los Angeles Kings,8,52.14,26,44.44,14,51.27,20,48.97,32,5.69
8,New Jersey Devils,9,52.08,8,53.85,5,53.86,10,52.12,13,8.81
9,Tampa Bay Lightning,10,51.83,29,40.43,20,49.17,13,51.69,14,8.8


In [11]:
# ex. get a team's stats
final_df.loc[final_df["Team"] == "Toronto Maple Leafs"]

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
17,Toronto Maple Leafs,18,50.27,4,56.6,9,52.87,5,54.03,4,10.6


In [12]:
from get_todays_games import getGames

matchups = getGames()
matchups

[('Golden Knights', 'Bruins'),
 ('Canadiens', 'Panthers'),
 ('Hurricanes', 'Blue Jackets'),
 ('Islanders', 'Red Wings'),
 ('Coyotes', 'Maple Leafs'),
 ('Sabres', 'Lightning'),
 ('Jets', 'Stars'),
 ('Wild', 'Predators'),
 ('Avalanche', 'Blackhawks'),
 ('Penguins', 'Kraken'),
 ('Kings', 'Canucks'),
 ('Ducks', 'Sharks')]

In [13]:
res = pd.DataFrame()

for away, home in matchups:
    away_df = final_df[final_df["Team"].str.contains(away)]
    home_df = final_df[final_df["Team"].str.contains(home)]
    matchup_df = pd.concat([away_df, home_df], ignore_index=True)
    res = pd.concat([res, matchup_df], ignore_index=True)
res


Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Vegas Golden Knights,32,42.37,19,50.0,26,44.94,29,41.88,3,10.78
1,Boston Bruins,27,45.99,24,47.5,15,50.81,11,51.79,24,7.72
2,Montreal Canadiens,22,48.57,23,47.83,10,52.08,23,48.63,7,9.57
3,Florida Panthers,4,53.29,1,65.52,3,55.05,3,55.61,26,7.31
4,Carolina Hurricanes,1,57.47,2,62.5,2,57.41,2,56.52,22,7.78
5,Columbus Blue Jackets,15,51.03,5,55.88,4,54.22,12,51.76,20,8.05
6,New York Islanders,26,46.84,11,53.13,23,48.77,21,48.95,19,8.13
7,Detroit Red Wings,24,47.97,12,53.06,27,44.86,27,42.62,2,10.92
8,Arizona Coyotes,12,51.65,28,41.3,12,51.65,15,51.5,15,8.8
9,Toronto Maple Leafs,18,50.27,4,56.6,9,52.87,5,54.03,4,10.6


In [14]:
f = open("result.csv", 'w')

for col in res.columns.values:
    f.write(col + ",")

f.write("\n")

i = 0
for col in res.values:
    for row in col:
        f.write(str(row) + ",")
    if i % 2 == 0:
        f.write("\n")
    else:
        f.write("\n\n")
    i += 1

f.close()