In [2]:
import requests
import pandas as pd
from io import StringIO
import time

def fetch_stats_for_year(year):
    url = f'https://www.hockey-reference.com/leagues/NHL_{year}_skaters.html'
    response = requests.get(url)
    response.raise_for_status()  # Ensure the request was successful

    # Wrap the HTML content in StringIO and read the table with pandas
    html_content = StringIO(response.text)
    tables = pd.read_html(html_content, header=[0, 1])
    df = tables[0]

    # Flatten the headers
    df.columns = [' '.join(col).strip() if 'Unnamed' not in col[0] else col[1] for col in df.columns.values]
    df.drop(columns=[col for col in df.columns if "Unnamed" in col], inplace=True)

    return df

# Collect data frames for each year
data_frames = []
for year in range(2016, 2025):  # Adjust range as necessary
    print(f"Fetching data for {year}...")
    df_year = fetch_stats_for_year(year)
    df_year['Year'] = year  # Add a year column for reference
    data_frames.append(df_year)
    time.sleep(10)  # Wait for 10 seconds before the next request

# Combine all data frames into a single data frame
combined_df = pd.concat(data_frames, ignore_index=True)

# Optionally, save to CSV
combined_df.to_csv('NHL_HR_Stats_2016_to_2024.csv', index=False)

print(combined_df)


Fetching data for 2016...
Fetching data for 2017...
Fetching data for 2018...
Fetching data for 2019...
Fetching data for 2020...
Fetching data for 2021...
Fetching data for 2022...
Fetching data for 2023...
Fetching data for 2024...
           Rk          Player   Age Team  Pos    GP  Scoring G  Scoring A  \
0         1.0    Patrick Kane  27.0  CHI   RW  82.0       46.0       60.0   
1         2.0      Jamie Benn  26.0  DAL   LW  82.0       41.0       48.0   
2         3.0   Sidney Crosby  28.0  PIT    C  80.0       36.0       49.0   
3         4.0   Erik Karlsson  25.0  OTT    D  82.0       16.0       66.0   
4         5.0    Joe Thornton  36.0  SJS    C  82.0       19.0       63.0   
...       ...             ...   ...  ...  ...   ...        ...        ...   
10551  1019.0     Colin White  27.0  PIT    C  11.0        0.0        0.0   
10552  1020.0  Ryan Winterton  20.0  SEA    C   9.0        0.0        0.0   
10553  1021.0     Dustin Wolf  22.0  CGY    G  17.0        0.0        0.0