<a href="https://colab.research.google.com/github/ccstevie/nhl_model/blob/main/NHL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import requests
import pandas as pd
from datetime import date, timedelta
from functools import reduce

In [4]:
# we will use the last 30 days as our data range
today = date.today()
print("Today's date:", today)
start = today - timedelta(days=30)
print("30 days ago:", start)

Today's date: 2024-02-21
30 days ago: 2024-01-22


In [5]:
url = f"https://www.naturalstattrick.com/teamtable.php?fromseason=20232024&thruseason=20232024&stype=2&sit=5v5&score=all&rate=n&team=all&loc=B&gpf=410&fd={start}&td={today}"
req = requests.get(url)
req.status_code

200

In [6]:
df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
df.head()

Unnamed: 0,Team,GP,TOI,W,L,OTL,ROW,Points,Point %,CF,...,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO
1,Winnipeg Jets,10,484:36,4,5,1,4,9,0.45,514,...,98,54.84,2,1,66.67,1.68,98.98,5.39,93.12,0.985
2,Boston Bruins,11,518:55,5,4,2,4,12,0.545,447,...,125,37.19,2,3,40.0,2.7,97.6,8.89,94.36,1.032
3,Vegas Golden Knights,9,452:19,5,3,1,5,11,0.611,386,...,106,41.76,2,2,50.0,2.63,98.11,11.0,92.92,1.039
4,New Jersey Devils,11,520:21,5,5,1,5,11,0.5,548,...,124,52.67,1,1,50.0,0.72,99.19,8.62,91.55,1.002
5,Florida Panthers,11,523:18,10,1,0,9,20,0.909,590,...,108,56.63,4,1,80.0,2.84,99.07,8.1,95.42,1.035


In [7]:
# sort by CF% GF% XGF% HDCF% SH% and obtain rankings for each team
cf = df.sort_values(by="CF%", ascending=False, ignore_index=True)
cf.index += 1
cf.reset_index(inplace=True)
cf = cf.rename(columns = {"index":"CF%"})
cf = cf[["Team", "CF%"]]
# print cf as an example
cf

Unnamed: 0,Team,CF%,CF%.1
0,Carolina Hurricanes,1,60.58
1,Los Angeles Kings,2,54.9
2,Vancouver Canucks,3,54.84
3,Florida Panthers,4,54.38
4,Philadelphia Flyers,5,53.97
5,Winnipeg Jets,6,53.71
6,Edmonton Oilers,7,53.25
7,Calgary Flames,8,53.17
8,Seattle Kraken,9,53.06
9,Tampa Bay Lightning,10,52.62


In [8]:
gf = df.sort_values(by="GF%", ascending=False, ignore_index=True)
gf.index += 1
gf.reset_index(inplace=True)
gf = gf.rename(columns = {"index":"GF%"})
gf = gf[["Team", "GF%"]]

In [9]:
xgf = df.sort_values(by="xGF%", ascending=False, ignore_index=True)
xgf.index += 1
xgf.reset_index(inplace=True)
xgf = xgf.rename(columns = {"index":"xGF%"})
xgf = xgf[["Team", "xGF%"]]

In [10]:
hdcf = df.sort_values(by="HDCF%", ascending=False, ignore_index=True)
hdcf.index += 1
hdcf.reset_index(inplace=True)
hdcf = hdcf.rename(columns = {"index":"HDCF%"})
hdcf = hdcf[["Team", "HDCF%"]]

In [11]:
sh = df.sort_values(by="SH%", ascending=False, ignore_index=True)
sh.index += 1
sh.reset_index(inplace=True)
sh = sh.rename(columns = {"index":"SH%"})
sh = sh[["Team", "SH%"]]

In [12]:
#merge all DataFrames into one
dfs = [cf, gf, xgf, hdcf, sh]
final_df = reduce(lambda  left,right: pd.merge(left,right,on=['Team'],
                                            how='outer'), dfs)
final_df

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Carolina Hurricanes,1,60.58,2,63.33,2,57.29,4,56.61,24,7.63
1,Los Angeles Kings,2,54.9,30,38.46,7,54.39,18,51.41,30,5.86
2,Vancouver Canucks,3,54.84,11,56.52,4,55.58,5,56.28,3,10.48
3,Florida Panthers,4,54.38,1,67.65,3,55.63,12,52.97,18,8.1
4,Philadelphia Flyers,5,53.97,31,37.84,12,52.91,3,56.82,29,6.19
5,Winnipeg Jets,6,53.71,23,46.43,13,52.35,14,51.79,31,5.39
6,Edmonton Oilers,7,53.25,4,60.53,8,54.33,2,58.13,8,9.66
7,Calgary Flames,8,53.17,14,51.35,5,54.96,19,50.9,23,7.69
8,Seattle Kraken,9,53.06,9,57.14,10,54.16,9,54.82,27,7.08
9,Tampa Bay Lightning,10,52.62,24,45.83,17,50.5,21,49.71,5,10.09


In [13]:
# ex. get a team's stats
final_df.loc[final_df["Team"] == "Toronto Maple Leafs"]

Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
20,Toronto Maple Leafs,21,48.56,10,56.67,14,51.06,6,55.56,20,8.06


In [14]:
from get_todays_games import getGames

matchups = getGames()
matchups

[('Senators', 'Panthers'),
 ('Stars', 'Rangers'),
 ('Islanders', 'Penguins'),
 ('Devils', 'Capitals'),
 ('Wild', 'Jets'),
 ('Canucks', 'Avalanche'),
 ('Predators', 'Golden Knights'),
 ('Blue Jackets', 'Kings')]

In [30]:
res = pd.DataFrame()

for away, home in matchups:
    away_df = final_df[final_df["Team"].str.contains(away)]
    home_df = final_df[final_df["Team"].str.contains(home)]
    matchup_df = pd.concat([away_df, home_df], ignore_index=True)
    res = pd.concat([res, matchup_df], ignore_index=True)
res


Unnamed: 0,Team,CF%,CF%.1,GF%,GF%.1,xGF%,xGF%.1,HDCF%,HDCF%.1,SH%,SH%.1
0,Ottawa Senators,18,50.28,28,42.55,18,50.36,15,51.76,25,7.41
1,Florida Panthers,4,54.38,1,67.65,3,55.63,12,52.97,18,8.1
2,Dallas Stars,11,52.45,3,61.7,1,57.32,1,60.3,11,9.32
3,New York Rangers,22,48.43,6,57.89,25,46.34,23,47.03,9,9.44
4,New York Islanders,17,50.69,13,55.17,11,53.29,17,51.47,26,7.41
5,Pittsburgh Penguins,13,51.17,22,47.06,16,50.91,13,52.36,28,6.75
6,New Jersey Devils,20,49.32,15,51.02,6,54.73,7,55.14,15,8.62
7,Washington Capitals,16,50.8,25,45.45,24,47.55,16,51.58,12,9.01
8,Minnesota Wild,15,51.09,17,48.84,9,54.33,8,55.06,7,9.68
9,Winnipeg Jets,6,53.71,23,46.43,13,52.35,14,51.79,31,5.39


In [45]:
f = open("result.csv", 'w')

for col in res.columns.values:
    f.write(col + ",")

f.write("\n\n")

i = 0
for col in res.values:
    for row in col:
        f.write(str(row) + ",")
    if i % 2 == 0:
        f.write("\n")
    else:
        f.write("\n\n")
    i += 1

f.close()