# NHL API Scrape

In [1]:
import requests
import pandas as pd
import json
import pprint
import datetime  # Add this line to import the datetime module


API_URL = "https://statsapi.web.nhl.com/api/v1"

response = requests.get(API_URL + "/teams", params={"Content-Type": "application/json"})
print(response.status_code)

200


In [2]:
response.json()

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2023. All Rights Reserved.',
 'teams': [{'id': 1,
   'name': 'New Jersey Devils',
   'link': '/api/v1/teams/1',
   'venue': {'name': 'Prudential Center',
    'link': '/api/v1/venues/null',
    'city': 'Newark',
    'timeZone': {'id': 'America/New_York', 'offset': -4, 'tz': 'EDT'}},
   'abbreviation': 'NJD',
   'teamName': 'Devils',
   'locationName': 'New Jersey',
   'firstYearOfPlay': '1982',
   'division': {'id': 18,
    'name': 'Metropolitan',
    'nameShort': 'Metro',
    'link': '/api/v1/divisions/18',
    'abbreviation': 'M'},
   'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
   'franchise': {'franchiseId': 23,
    'teamName': 'Devils',
    'link': '/api/v1/franchises/23'},
   'shortName': 'New Jersey',
   'officialSiteUrl': 'http://www.newjerseydevils.com/',
   'franchiseId': 23,
   'a

Parse this response and access relevant fields.

In [3]:
def get_and_sort_nhl_teams():
    API_URL = "https://statsapi.web.nhl.com/api/v1/teams"
    
    try:
        response = requests.get(API_URL, params={"Content-Type": "application/json"})
        response.raise_for_status()  # Raise an exception for bad requests (e.g., 404, 500)
        teams_data = response.json().get('teams', [])
        
        # Sort teams alphabetically by name
        sorted_teams = sorted(teams_data, key=lambda x: x['name'])
        
        return sorted_teams
    except requests.exceptions.RequestException as e:
        print(f"Request Error: {e}")
        return []

# Example usage
sorted_teams = get_and_sort_nhl_teams()

for team in sorted_teams:
    print(team['name'])

Anaheim Ducks
Arizona Coyotes
Boston Bruins
Buffalo Sabres
Calgary Flames
Carolina Hurricanes
Chicago Blackhawks
Colorado Avalanche
Columbus Blue Jackets
Dallas Stars
Detroit Red Wings
Edmonton Oilers
Florida Panthers
Los Angeles Kings
Minnesota Wild
Montréal Canadiens
Nashville Predators
New Jersey Devils
New York Islanders
New York Rangers
Ottawa Senators
Philadelphia Flyers
Pittsburgh Penguins
San Jose Sharks
Seattle Kraken
St. Louis Blues
Tampa Bay Lightning
Toronto Maple Leafs
Vancouver Canucks
Vegas Golden Knights
Washington Capitals
Winnipeg Jets


In [4]:
def get_season_standings(season):
    API_URL = f"https://statsapi.web.nhl.com/api/v1/standings?season={season}"
    
    try:
        response = requests.get(API_URL)
        response.raise_for_status()
        standings_data = response.json().get('records', [])
        return standings_data
    except requests.exceptions.RequestException as e:
        print(f"Request Error: {e}")
        return []

In [5]:
def get_data_for_last_5_seasons():
    today = datetime.date.today()
    current_year = today.year
    
    # Seasons are typically in the format "YYYYYYYY" (e.g., "20212022" for the 2021-2022 season)
    seasons = [str(year) + str(year + 1) for year in range(current_year - 5, current_year)]
    
    data_for_last_5_seasons = []
    
    for season in seasons:
        season_data = get_season_standings(season)
        data_for_last_5_seasons.append({
            'season': season,
            'data': season_data
        })
    
    return data_for_last_5_seasons

In [6]:
# Use function to get last 5 seasons
#last_5_seasons_data = get_data_for_last_5_seasons()

#for season_data in last_5_seasons_data:
#    print(f"Season: {season_data['season']}")
#    for record in season_data['data']:
#        team_name = record['team']['name']
#        points = record['leagueRecord']['points']
#        print(f"Team: {team_name}, Points: {points}")

In [8]:
def get_nhl_team_stats(season_id):
    API_URL = f"https://statsapi.web.nhl.com/api/v1/standings?season={season_id}"
    
    try:
        response = requests.get(API_URL, params={"Content-Type": "application/json"})
        response.raise_for_status()  # Raise an exception for bad requests (e.g., 404, 500)
        standings_data = response.json()
        
        if 'records' in standings_data:
            for record in standings_data['records']:
                if 'teamRecords' in record:
                    for team_record in record['teamRecords']:
                        team_name = team_record['team']['name']
                        points = team_record['points']
                        print(f"Team: {team_name}, Points: {points}")
        else:
            print("No records found for this season.")
    except rq.exceptions.RequestException as e:
        print(f"Request Error: {e}")

# Example usage to get NHL team stats for the 2022-2023 season (season_id = 20222023)
get_nhl_team_stats(20222023)

Team: Carolina Hurricanes, Points: 113
Team: New Jersey Devils, Points: 112
Team: New York Rangers, Points: 107
Team: New York Islanders, Points: 93
Team: Pittsburgh Penguins, Points: 91
Team: Washington Capitals, Points: 80
Team: Philadelphia Flyers, Points: 75
Team: Columbus Blue Jackets, Points: 59
Team: Boston Bruins, Points: 135
Team: Toronto Maple Leafs, Points: 111
Team: Tampa Bay Lightning, Points: 98
Team: Florida Panthers, Points: 92
Team: Buffalo Sabres, Points: 91
Team: Ottawa Senators, Points: 86
Team: Detroit Red Wings, Points: 80
Team: Montréal Canadiens, Points: 68
Team: Colorado Avalanche, Points: 109
Team: Dallas Stars, Points: 108
Team: Minnesota Wild, Points: 103
Team: Winnipeg Jets, Points: 95
Team: Nashville Predators, Points: 92
Team: St. Louis Blues, Points: 81
Team: Arizona Coyotes, Points: 70
Team: Chicago Blackhawks, Points: 59
Team: Vegas Golden Knights, Points: 111
Team: Edmonton Oilers, Points: 109
Team: Los Angeles Kings, Points: 104
Team: Seattle Kraken,

In [9]:
def get_game_linescore(game_id):
    API_URL = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/linescore"
    
    try:
        response = requests.get(API_URL, params={"Content-Type": "application/json"})
        response.raise_for_status()
        linescore_data = response.json()
        return linescore_data
    except requests.exceptions.RequestException as e:
        print(f"Request Error: {e}")
        return None

In [10]:
# Test
# Specify a game ID you want to explore
game_id_to_explore = "2019020001"

# Call the function to get the linescore data for that game
linescore_data = get_game_linescore(game_id_to_explore)

# Now you can explore the JSON data directly
linescore_data

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2023. All Rights Reserved.',
 'currentPeriod': 3,
 'currentPeriodOrdinal': '3rd',
 'currentPeriodTimeRemaining': 'Final',
 'periods': [{'periodType': 'REGULAR',
   'startTime': '2019-10-02T23:13:27Z',
   'endTime': '2019-10-02T23:52:39Z',
   'num': 1,
   'ordinalNum': '1st',
   'home': {'goals': 0, 'shotsOnGoal': 14, 'rinkSide': 'right'},
   'away': {'goals': 1, 'shotsOnGoal': 12, 'rinkSide': 'left'}},
  {'periodType': 'REGULAR',
   'startTime': '2019-10-03T00:11:07Z',
   'endTime': '2019-10-03T00:54:27Z',
   'num': 2,
   'ordinalNum': '2nd',
   'home': {'goals': 4, 'shotsOnGoal': 17, 'rinkSide': 'left'},
   'away': {'goals': 1, 'shotsOnGoal': 3, 'rinkSide': 'right'}},
  {'periodType': 'REGULAR',
   'startTime': '2019-10-03T01:12:55Z',
   'endTime': '2019-10-03T01:50:09Z',
   'num': 3,
   'ordinalNum': '3rd',
   'home': {

In [11]:
def extract_game_info(linescore_data):
    # Extract the date, home team, away team, and relevant stats
    date = linescore_data['periods'][0]['startTime'][:10]
    home_team = linescore_data['teams']['home']['team']['name']
    away_team = linescore_data['teams']['away']['team']['name']
    total_goals = linescore_data['teams']['home']['goals'] + linescore_data['teams']['away']['goals']
    home_goals = linescore_data['teams']['home']['goals']
    away_goals = linescore_data['teams']['away']['goals']
    home_shots = linescore_data['teams']['home']['shotsOnGoal']
    away_shots = linescore_data['teams']['away']['shotsOnGoal']

    # Calculate save percentage for home and away teams
    home_sv_percentage = away_sv_percentage = 0.0
    if home_shots > 0:
        home_sv_percentage = round(1 - (away_goals / home_shots), 4)
    if away_shots > 0:
        away_sv_percentage = round(1 - (home_goals / away_shots), 4)

    return {
        'Date': date,
        'Home Team': home_team,
        'Away Team': away_team,
        'Total Goals': total_goals,
        'Home Score': home_goals,
        'Away Score': away_goals,
        'Home Shots': home_shots,
        'Away Shots': away_shots,
        'Home SV%': home_sv_percentage,
        'Away SV%': away_sv_percentage
    }

In [12]:
# Input season start and end
season_start = int(input("Enter the season start (e.g., 2017020001): "))
season_end = int(input("Enter the season end (e.g., 2017021271): "))

game_info_list = []

for game_id in range(season_start, season_end + 1):
    game_id_str = str(game_id)
    linescore = get_game_linescore(game_id_str)
    
    if linescore:
        print(f"Processing game ID: {game_id_str}")
        game_info = extract_game_info(linescore)
        if game_info:
            game_info['Game ID'] = game_id_str
            game_info_list.append(game_info)
        else:
            print(f"Game info extraction failed for game ID: {game_id_str}")
    else:
        print(f"Unable to fetch data for game ID: {game_id_str}")

# Create a pandas DataFrame from the list of game information
game_info_df = pd.DataFrame(game_info_list)

# Display the DataFrame
game_info_df

Enter the season start (e.g., 2017020001):  2017020001
Enter the season end (e.g., 2017021271):  2017021271


Processing game ID: 2017020001
Processing game ID: 2017020002
Processing game ID: 2017020003
Processing game ID: 2017020004
Processing game ID: 2017020005
Processing game ID: 2017020006
Processing game ID: 2017020007
Processing game ID: 2017020008
Processing game ID: 2017020009
Processing game ID: 2017020010
Processing game ID: 2017020011
Processing game ID: 2017020012
Processing game ID: 2017020013
Processing game ID: 2017020014
Processing game ID: 2017020015
Processing game ID: 2017020016
Processing game ID: 2017020017
Processing game ID: 2017020018
Processing game ID: 2017020019
Processing game ID: 2017020020
Processing game ID: 2017020021
Processing game ID: 2017020022
Processing game ID: 2017020023
Processing game ID: 2017020024
Processing game ID: 2017020025
Processing game ID: 2017020026
Processing game ID: 2017020027
Processing game ID: 2017020028
Processing game ID: 2017020029
Processing game ID: 2017020030
Processing game ID: 2017020031
Processing game ID: 2017020032
Processi

Unnamed: 0,Date,Home Team,Away Team,Total Goals,Home Score,Away Score,Home Shots,Away Shots,Home SV%,Away SV%,Game ID
0,2017-10-04,Winnipeg Jets,Toronto Maple Leafs,9,2,7,37,31,0.8108,0.9355,2017020001
1,2017-10-05,Pittsburgh Penguins,St. Louis Blues,9,4,5,33,34,0.8485,0.8824,2017020002
2,2017-10-05,Edmonton Oilers,Calgary Flames,3,3,0,45,27,1.0000,0.8889,2017020003
3,2017-10-05,San Jose Sharks,Philadelphia Flyers,8,3,5,35,31,0.8571,0.9032,2017020004
4,2017-10-05,Boston Bruins,Nashville Predators,7,4,3,32,29,0.9062,0.8621,2017020005
...,...,...,...,...,...,...,...,...,...,...,...
1266,2018-04-08,Arizona Coyotes,Anaheim Ducks,3,0,3,31,27,0.9032,1.0000,2017021267
1267,2018-04-08,Calgary Flames,Vegas Golden Knights,8,7,1,31,27,0.9677,0.7407,2017021268
1268,2018-04-08,Edmonton Oilers,Vancouver Canucks,5,3,2,33,38,0.9394,0.9211,2017021269
1269,2018-04-08,Los Angeles Kings,Dallas Stars,6,2,4,36,18,0.8889,0.8889,2017021270


In [83]:
# Export the DataFrame to a CSV file
game_info_df.to_csv('NHL_Game_Info_2022-2023.csv', index=False)

### Explore BoxScore

In [13]:
def get_game_boxscore(game_id):
    API_URL = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/boxscore"
    
    try:
        response = requests.get(API_URL, params={"Content-Type": "application/json"})
        response.raise_for_status()
        boxscore_data = response.json()
        return boxscore_data
    except requests.exceptions.RequestException as e:
        print(f"Request Error: {e}")
        return None

In [14]:
# Test
# Specify a game ID you want to explore
game_id_to_explore = "2019020001"

# Call the function to get the linescore data for that game
boxscore_data = get_game_boxscore(game_id_to_explore)

# Now you can explore the JSON data directly
boxscore_data

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2023. All Rights Reserved.',
 'teams': {'away': {'team': {'id': 9,
    'name': 'Ottawa Senators',
    'link': '/api/v1/teams/9'},
   'teamStats': {'teamSkaterStats': {'goals': 3,
     'pim': 10,
     'shots': 26,
     'powerPlayPercentage': '0.0',
     'powerPlayGoals': 0.0,
     'powerPlayOpportunities': 3.0,
     'faceOffWinPercentage': '57.5',
     'blocked': 17,
     'takeaways': 7,
     'giveaways': 8,
     'hits': 44}},
   'players': {'ID8467950': {'person': {'id': 8467950,
      'fullName': 'Craig Anderson',
      'link': '/api/v1/people/8467950',
      'firstName': 'Craig',
      'lastName': 'Anderson',
      'primaryNumber': '41',
      'birthDate': '1981-05-21',
      'currentAge': 42,
      'birthCity': 'Park Ridge',
      'birthStateProvince': 'IL',
      'birthCountry': 'USA',
      'nationality': 'USA',
    

In [15]:
def get_team_stats(game_id):
    API_URL = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/boxscore"
    
    try:
        response = requests.get(API_URL, params={"Content-Type": "application/json"})
        response.raise_for_status()
        boxscore_data = response.json()
        return boxscore_data
    except requests.exceptions.RequestException as e:
        print(f"Request Error: {e}")
        return None

# Specify a game ID you want to explore
game_id_to_explore = "2019020001"

# Call the function to get the boxscore data for that game
boxscore_data = get_team_stats(game_id_to_explore)

# Extract away team stats
away_team_stats = boxscore_data['teams']['away']['teamStats']['teamSkaterStats']

# Extract home team stats
home_team_stats = boxscore_data['teams']['home']['teamStats']['teamSkaterStats']

# Create DataFrames with labeled columns excluding Takeaways, Giveaways, and Hits
away_team_df = pd.DataFrame([{
    'Game ID': game_id_to_explore,
    'Away Team': boxscore_data['teams']['away']['team']['name'],
    'Away Shots': away_team_stats['shots'],
    'Away PowerPlayPercentage': away_team_stats['powerPlayPercentage'],
    'Away PowerPlayGoals': away_team_stats['powerPlayGoals'],
    'Away PowerPlayOpportunities': away_team_stats['powerPlayOpportunities'],
    'Away Blocked': away_team_stats['blocked']
}])

home_team_df = pd.DataFrame([{
    'Home Team': boxscore_data['teams']['home']['team']['name'],
    'Home Shots': home_team_stats['shots'],
    'Home PowerPlayPercentage': home_team_stats['powerPlayPercentage'],
    'Home PowerPlayGoals': home_team_stats['powerPlayGoals'],
    'Home PowerPlayOpportunities': home_team_stats['powerPlayOpportunities'],
    'Home Blocked': home_team_stats['blocked']
}])

# Combine DataFrames
combined_df = pd.concat([away_team_df, home_team_df], axis=1)


In [16]:
# Display the combined DataFrame
combined_df

Unnamed: 0,Game ID,Away Team,Away Shots,Away PowerPlayPercentage,Away PowerPlayGoals,Away PowerPlayOpportunities,Away Blocked,Home Team,Home Shots,Home PowerPlayPercentage,Home PowerPlayGoals,Home PowerPlayOpportunities,Home Blocked
0,2019020001,Ottawa Senators,26,0.0,0.0,3.0,17,Toronto Maple Leafs,42,20.0,1.0,5.0,9


In [17]:
%%time

def get_team_stats_for_season(season_start, season_end):
    game_info_list = []

    for game_id in range(season_start, season_end + 1):
        game_id_str = str(game_id)
        boxscore_data = get_team_stats(game_id_str)
        
        if boxscore_data:
            away_team_stats = boxscore_data['teams']['away']['teamStats']['teamSkaterStats']
            home_team_stats = boxscore_data['teams']['home']['teamStats']['teamSkaterStats']
            
            game_info = {
                'Game ID': game_id_str,
                'Away Team': boxscore_data['teams']['away']['team']['name'],
                'Away Shots': away_team_stats['shots'],
                'Away PowerPlayPercentage': away_team_stats['powerPlayPercentage'],
                'Away PowerPlayGoals': away_team_stats['powerPlayGoals'],
                'Away PowerPlayOpportunities': away_team_stats['powerPlayOpportunities'],
                'Away Blocked': away_team_stats['blocked'],
                'Home Team': boxscore_data['teams']['home']['team']['name'],
                'Home Shots': home_team_stats['shots'],
                'Home PowerPlayPercentage': home_team_stats['powerPlayPercentage'],
                'Home PowerPlayGoals': home_team_stats['powerPlayGoals'],
                'Home PowerPlayOpportunities': home_team_stats['powerPlayOpportunities'],
                'Home Blocked': home_team_stats['blocked']
            }
            
            game_info_list.append(game_info)
            print(f"Processed game ID: {game_id_str}")
        else:
            print(f"Unable to fetch data for game ID: {game_id_str}")

    return pd.DataFrame(game_info_list)

# Input the season range
season_start = int(input("Enter the season start (e.g., 2019020001): "))
season_end = int(input("Enter the season end (e.g., 2019021271): "))

# Call the function to get team stats for the specified season
combined_stats_df = get_team_stats_for_season(season_start, season_end)

# Display the combined DataFrame
combined_stats_df

Enter the season start (e.g., 2019020001):  2020020001
Enter the season end (e.g., 2019021271):  2020021271


Processed game ID: 2020020001
Processed game ID: 2020020002
Processed game ID: 2020020003
Processed game ID: 2020020004
Processed game ID: 2020020005
Processed game ID: 2020020006
Processed game ID: 2020020007
Processed game ID: 2020020008
Processed game ID: 2020020009
Processed game ID: 2020020010
Processed game ID: 2020020011
Processed game ID: 2020020012
Processed game ID: 2020020013
Processed game ID: 2020020014
Processed game ID: 2020020015
Processed game ID: 2020020016
Processed game ID: 2020020017
Processed game ID: 2020020018
Processed game ID: 2020020019
Processed game ID: 2020020020
Processed game ID: 2020020021
Processed game ID: 2020020022
Processed game ID: 2020020023
Processed game ID: 2020020024
Processed game ID: 2020020025
Processed game ID: 2020020026
Processed game ID: 2020020027
Processed game ID: 2020020028
Processed game ID: 2020020029
Processed game ID: 2020020030
Processed game ID: 2020020031
Processed game ID: 2020020032
Processed game ID: 2020020033
Processed 

Unnamed: 0,Game ID,Away Team,Away Shots,Away PowerPlayPercentage,Away PowerPlayGoals,Away PowerPlayOpportunities,Away Blocked,Home Team,Home Shots,Home PowerPlayPercentage,Home PowerPlayGoals,Home PowerPlayOpportunities,Home Blocked
0,2020020001,Pittsburgh Penguins,34,33.3,1.0,3.0,11,Philadelphia Flyers,27,66.7,2.0,3.0,13
1,2020020002,Chicago Blackhawks,23,33.3,1.0,3.0,7,Tampa Bay Lightning,33,50.0,2.0,4.0,12
2,2020020003,Montréal Canadiens,32,66.7,2.0,3.0,22,Toronto Maple Leafs,34,50.0,2.0,4.0,17
3,2020020004,Vancouver Canucks,35,0.0,0.0,2.0,14,Edmonton Oilers,31,0.0,0.0,4.0,17
4,2020020005,St. Louis Blues,32,0.0,0.0,2.0,19,Colorado Avalanche,27,25.0,1.0,4.0,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,2020020864,Calgary Flames,40,0.0,0.0,3.0,8,Vancouver Canucks,24,0.0,0.0,3.0,10
864,2020020865,Anaheim Ducks,22,0.0,0.0,1.0,7,Minnesota Wild,25,25.0,1.0,4.0,14
865,2020020866,St. Louis Blues,21,0.0,0.0,3.0,18,Vegas Golden Knights,29,25.0,1.0,4.0,17
866,2020020867,Colorado Avalanche,30,33.3,1.0,3.0,13,Los Angeles Kings,18,0.0,0.0,2.0,12


In [18]:
# Save the combined DataFrame to a CSV file
combined_stats_df.to_csv('NHL_BoxScore_2020-2021.csv', index=False)