In [2]:
from scipy.stats import poisson
import matplotlib
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import time

In [3]:
years = list(range(2006, 2023))
years.remove(2021) # 2021 was the COVID shortened season, which adds a number of complications to the anaylsis that I didn't want to deal with
years

[2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020,
 2022]

In [4]:
comb_standings = pd.DataFrame()

for year in years:
    url = f'https://www.hockey-reference.com/leagues/NHL_{year}.html'
    data = pd.read_html(url)
    
    # Eastern Conference
    standings_east = data[0]
    standings_east.rename(columns = {'Unnamed: 0': 'Team'}, inplace = True)
    team_east = standings_east['Team']
    gp_east = standings_east['GP']
    gf_east = standings_east['GF']
    ga_east = standings_east['GA']
    east_df = pd.DataFrame(list(zip(team_east, gp_east, gf_east, ga_east)), columns = ['Team', 'Games Played', 'Goals Scored', 'Goals Allowed'])
    east_df.set_index('Team', inplace = True)
    east_df['Year'] = year
    east_df['Conference'] = 'East'
    
    # Western Conference
    standings_west = data[1]
    standings_west.rename(columns = {'Unnamed: 0': 'Team'}, inplace = True)
    team_west = standings_west['Team']
    gp_west = standings_west['GP']
    gf_west = standings_west['GF']
    ga_west = standings_west['GA']
    west_df = pd.DataFrame(list(zip(team_west, gp_west, gf_west, ga_west)), columns = ['Team', 'Games Played', 'Goals Scored', 'Goals Allowed'])
    west_df.set_index('Team', inplace = True)
    west_df['Year'] = year
    west_df['Conference'] = 'West'
    
    # Combine
    comb_standings = pd.concat([comb_standings, east_df, west_df])
    
    time.sleep(1)
    

In [17]:
comb_standings_new = comb_standings.copy()
comb_standings_new

Unnamed: 0_level_0,Games Played,Goals Scored,Goals Allowed,Year,Conference
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2006,East
Philadelphia Flyers*,82,267,259,2006,East
New Jersey Devils*,82,242,229,2006,East
New York Rangers*,82,257,215,2006,East
New York Islanders,82,230,278,2006,East
...,...,...,...,...,...
Vegas Golden Knights,82,266,248,2022,West
Vancouver Canucks,82,249,236,2022,West
San Jose Sharks,82,214,264,2022,West
Anaheim Ducks,82,232,271,2022,West


In [18]:
comb_standings_new.drop(index = ['Atlantic Division', 'Metropolitan Division', 'Central Division', 'Pacific Division'], inplace = True)
comb_standings_new.drop(index = ['Northeast Division', 'Northwest Division', 'Southeast Division'], inplace = True)
comb_standings_new

Unnamed: 0_level_0,Games Played,Goals Scored,Goals Allowed,Year,Conference
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Philadelphia Flyers*,82,267,259,2006,East
New Jersey Devils*,82,242,229,2006,East
New York Rangers*,82,257,215,2006,East
New York Islanders,82,230,278,2006,East
Pittsburgh Penguins,82,244,316,2006,East
...,...,...,...,...,...
Vegas Golden Knights,82,266,248,2022,West
Vancouver Canucks,82,249,236,2022,West
San Jose Sharks,82,214,264,2022,West
Anaheim Ducks,82,232,271,2022,West


In [19]:
comb_standings_new['Games Played'] = comb_standings_new['Games Played'].astype(int)
comb_standings_new['Goals Scored'] = comb_standings_new['Goals Scored'].astype(int)
comb_standings_new['Goals Allowed'] = comb_standings_new['Goals Allowed'].astype(int)
comb_standings_new

Unnamed: 0_level_0,Games Played,Goals Scored,Goals Allowed,Year,Conference
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Philadelphia Flyers*,82,267,259,2006,East
New Jersey Devils*,82,242,229,2006,East
New York Rangers*,82,257,215,2006,East
New York Islanders,82,230,278,2006,East
Pittsburgh Penguins,82,244,316,2006,East
...,...,...,...,...,...
Vegas Golden Knights,82,266,248,2022,West
Vancouver Canucks,82,249,236,2022,West
San Jose Sharks,82,214,264,2022,West
Anaheim Ducks,82,232,271,2022,West


In [20]:
comb_standings_new['GSPG'] = (comb_standings_new['Goals Scored']/comb_standings_new['Games Played']).round(1)
comb_standings_new['GAPG'] = (comb_standings_new['Goals Allowed']/comb_standings_new['Games Played']).round(1)
comb_standings_new

Unnamed: 0_level_0,Games Played,Goals Scored,Goals Allowed,Year,Conference,GSPG,GAPG
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Philadelphia Flyers*,82,267,259,2006,East,3.3,3.2
New Jersey Devils*,82,242,229,2006,East,3.0,2.8
New York Rangers*,82,257,215,2006,East,3.1,2.6
New York Islanders,82,230,278,2006,East,2.8,3.4
Pittsburgh Penguins,82,244,316,2006,East,3.0,3.9
...,...,...,...,...,...,...,...
Vegas Golden Knights,82,266,248,2022,West,3.2,3.0
Vancouver Canucks,82,249,236,2022,West,3.0,2.9
San Jose Sharks,82,214,264,2022,West,2.6,3.2
Anaheim Ducks,82,232,271,2022,West,2.8,3.3


In [21]:
comb_standings_new = comb_standings_new[['Games Played', 'Goals Scored', 'GSPG', 'Goals Allowed', 'GAPG', 'Year', 'Conference']]
comb_standings_new

Unnamed: 0_level_0,Games Played,Goals Scored,GSPG,Goals Allowed,GAPG,Year,Conference
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Philadelphia Flyers*,82,267,3.3,259,3.2,2006,East
New Jersey Devils*,82,242,3.0,229,2.8,2006,East
New York Rangers*,82,257,3.1,215,2.6,2006,East
New York Islanders,82,230,2.8,278,3.4,2006,East
Pittsburgh Penguins,82,244,3.0,316,3.9,2006,East
...,...,...,...,...,...,...,...
Vegas Golden Knights,82,266,3.2,248,3.0,2022,West
Vancouver Canucks,82,249,3.0,236,2.9,2022,West
San Jose Sharks,82,214,2.6,264,3.2,2022,West
Anaheim Ducks,82,232,2.8,271,3.3,2022,West


In [22]:
comb_standings_new.to_csv('/Users/calebsmith/Documents/Personal Projects/NHL OT/Goal Diffs.csv')