In [None]:
import requests
from datetime import datetime
import pandas as pd

# Define the NHL API endpoint for the schedule endpoint
api_url = "https://statsapi.web.nhl.com/api/v1/schedule"

# Define a list of seasons you want to retrieve data for
start_season = 20002001
current_year = datetime.now().year
current_month = currentMonth = datetime.now().month

if current_month >= 8:
    end_season = int(str(current_year) + str(current_year + 1))
else:
    end_season = int(str(current_year - 1) + str(current_year))

seasons = []
for i in range(start_season, end_season + 10001, 10001):
    seasons.append(str(i))

# Initialize an empty list to store game data
all_games = []

# Loop through each season and retrieve game data
for season in seasons:
    # Define parameters for the API request for the current season
    params = {
        "hydrate": "team,linescore,game(content(media(epg))),broadcasts(all)",
        "site": "en_nhl",
        "season": season,
    }

    # Send a GET request to the NHL API
    response = requests.get(api_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()

        # Extract game data from the response
        games = []
        for date in data["dates"]:
            for game in date["games"]:
                games.append(game)

        # Add a "Season" column to the game data
        for game in games:
            game["Season"] = season

        # Extend the list of all games with games from the current season
        all_games.extend(games)
    else:
        print(f"Failed to retrieve data for season {season}. Status code:", response.status_code)

# Create a Pandas DataFrame from all the game data
df = pd.DataFrame(all_games)

Failed to retrieve data for season 20072008. Status code: 504
Failed to retrieve data for season 20082009. Status code: 504
Failed to retrieve data for season 20102011. Status code: 504
Failed to retrieve data for season 20112012. Status code: 504
Failed to retrieve data for season 20152016. Status code: 504
Failed to retrieve data for season 20162017. Status code: 504
Failed to retrieve data for season 20172018. Status code: 504
Failed to retrieve data for season 20182019. Status code: 504


In [81]:
df['game_id'] = df['gamePk'].astype(str)
df.rename(columns = {'Season' : 'season', 'gameType' : 'type', 'gameDate' : 'date_time_GMT'}, inplace = True)

def extract_away_team_id(row):
    return row['away']['team']['id']

def extract_home_team_id(row):
    return row['home']['team']['id']

def extract_away_goals(row):
    return row['away']['score']

def extract_home_goals(row):
    return row['home']['score']

def extract_game_status(row):
    return row['detailedState']

# Apply the extraction functions to create new columns
df['away_team_id'] = df['teams'].apply(extract_away_team_id)
df['home_team_id'] = df['teams'].apply(extract_home_team_id)
df['away_goals'] = df['teams'].apply(extract_away_goals)
df['home_goals'] = df['teams'].apply(extract_home_goals)
df['game_status'] = df['status'].apply(extract_game_status)

# Calculate the outcome based on goals
df['outcome'] = df.apply(lambda row: 'Home Win' if row['home_goals'] > row['away_goals'] else 'Away Win' if row['away_goals'] > row['home_goals'] else 'Tie', axis=1)

df_game = df[['game_id', 'season', 'type', 'date_time_GMT', 'away_team_id', 'home_team_id', 'away_goals', 'home_goals', 'outcome', 'game_status']]
df_game

Unnamed: 0,game_id,season,season.1,type,date_time_GMT,away_team_id,home_team_id,away_goals,home_goals,outcome,game_status
0,2021010001,20212022,20212022,PR,2021-09-25T23:00:00Z,8,10,1,4,Home Win,Final
1,2021010002,20212022,20212022,PR,2021-09-26T00:00:00Z,30,19,2,6,Home Win,Final
2,2021010003,20212022,20212022,PR,2021-09-26T18:00:00Z,18,13,4,5,Home Win,Final
3,2021010004,20212022,20212022,PR,2021-09-26T21:00:00Z,6,15,3,2,Away Win,Final
4,2021010005,20212022,20212022,PR,2021-09-26T22:00:00Z,18,13,1,3,Home Win,Final
...,...,...,...,...,...,...,...,...,...,...,...
4436,2023021308,20232024,20232024,R,2024-04-19T00:00:00Z,23,52,0,0,Tie,Scheduled
4437,2023021309,20232024,20232024,R,2024-04-19T01:00:00Z,28,20,0,0,Tie,Scheduled
4438,2023021310,20232024,20232024,R,2024-04-19T01:30:00Z,22,21,0,0,Tie,Scheduled
4439,2023021311,20232024,20232024,R,2024-04-19T02:00:00Z,24,54,0,0,Tie,Scheduled
