In [2]:
from db import db

import pandas as pd
import constants

import requests
import json

from datetime import datetime
import pytz

In [None]:
# Desired stats
stat_cols = ['evGGARatio', 'shotsPerGame', 'shotsAllowed', 'shootingPctg', 'savePctg']

# Iterate over teams in db and collect season stats
season_stats = []
for team in db.nhl_teams.find():
    team_stats = []
    url = f"https://statsapi.web.nhl.com/api/v1/teams/{team['_id']}/stats"
    stats_response = dict(requests.get(url).json())
    stats_dict = stats_response['stats'][0]['splits'][0]['stat']
    team_stats.append(team['_id'])
    for stat in stat_cols:
        team_stats.append(stats_dict[stat])
    season_stats.append(team_stats)

# Create dataframe of season stats
season_stats = pd.DataFrame(season_stats, columns=['id']+stat_cols)

In [3]:
# Desired seasons
seasons = ['20152016', '20162017', '20172018', '20182019', '20192020', '20202021', '20212022', '20222023']

# Desired stats
team_stats = ['evGGARatio', 'shotsPerGame', 'shootingPctg']
opp_stats = ['shotsAllowed', 'savePctg']
cols = ['game_id', 'date', 'team_id', 'home_away', 'goals'] + team_stats + ['oppShotsAllowed', 'oppSavePctg']

# Create gamelog dataframe
nhl_gamelogs = []

# Base URL for NHL API
base_url = "https://statsapi.web.nhl.com"

# Iterate over seasons and collect gamelogs
for season in seasons:
    url = f"{base_url}/api/v1/schedule?season={season}&gameType=R"
    schedule_response = dict(requests.get(url).json())
    for date in schedule_response['dates']:
        for game in date['games']:
            try:
                # Get game id and date
                game_id = game['gamePk']
                date = datetime.strptime(game['gameDate'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

                # Get home team score and stats
                home_id = game['teams']['home']['team']['id']
                home_goals = game['teams']['home']['score']
                home_data = [game_id, date, home_id, 'H', home_goals]
                home_stats_url = f"{base_url}/api/v1/teams/{home_id}/stats?date={date}"
                home_stats_response = dict(requests.get(home_stats_url).json())
                home_stats_dict = home_stats_response['stats'][0]['splits'][0]['stat']
                home_data.extend([home_stats_dict[stat] for stat in team_stats])

                # Get away team score and stats
                away_id = game['teams']['away']['team']['id']
                away_goals = game['teams']['away']['score']
                away_data = [game_id, date, away_id, 'A', away_goals]
                away_stats_url = f"{base_url}/api/v1/teams/{away_id}/stats?date={date}"
                away_stats_response = dict(requests.get(away_stats_url).json())
                away_stats_dict = away_stats_response['stats'][0]['splits'][0]['stat']
                away_data.extend([away_stats_dict[stat] for stat in team_stats])

                # Add opponent stats
                home_data.extend([away_stats_dict[stat] for stat in opp_stats])
                away_data.extend([home_stats_dict[stat] for stat in opp_stats])

                # Add to dataframe
                nhl_gamelogs.append(home_data)
                nhl_gamelogs.append(away_data)
            except Exception as e:  # Catch a general exception and print the error message
                print(f"Error with game {game['gamePk']}: {str(e)}")
                continue

# Create dataframe of gamelogs
nhl_gamelogs_df = pd.DataFrame(nhl_gamelogs, columns=cols)

# Save dataframe to CSV
nhl_gamelogs_df.to_csv('data/nhl_gamelogs.csv', index=False)