# export_season.ipynb
Use this file to extract all_scores.csv and team_stats.json for a specific season. 
This data can be used to train/test the model

In [8]:
# season = "20222023"

season = "20232024"

## Getting Scores from all games from the season
Iterate through all of the games in the chosen season using the NHL API and store who won the game in "scores".

In [7]:
import requests
import json
from datetime import datetime, timedelta

api_url = "https://api-web.nhle.com"

with open('teams.json', 'r') as file:
    teams = json.load(file)

# Get scores for all games
start_date = datetime(2022, 10, 10)  
end_date = datetime(2023, 4, 13)  

schedule = []

current_date = start_date
while current_date <= end_date:
    formatted_date = current_date.strftime("%Y-%m-%d")
    
    response = requests.get(f"{api_url}/v1/score/{formatted_date}")
    
    if response.status_code == 200:
        scores_data = response.json()
        schedule.append(scores_data)
    else:
        print(f"Failed to fetch scores for {formatted_date}. Status code: {response.status_code}")
    
    current_date += timedelta(days=1)

In [9]:
# Parse game data
total_games = 0
scores = []
for day in schedule:
    total_games += len(day['games'])
    for game in day['games']:
        awayTeam = game['awayTeam']['abbrev']
        awayScore = game['awayTeam']['score']
        homeTeam = game['homeTeam']['abbrev']
        homeScore = game['homeTeam']['score']
        homeWin = homeScore > awayScore

        if awayTeam not in [team['abbreviation'] for team in teams]:
            print(f"Away team {awayTeam} is not in teams.json. Skipping...")
            continue
        
        if homeTeam not in [team['abbreviation'] for team in teams]:
            print(f"Home team {homeTeam} is not in teams.json. Skipping...")
            continue
            
        score_data = {
            "awayTeam": awayTeam,
            "awayScore": awayScore,
            "homeTeam": homeTeam,
            "homeScore": homeScore,
            "homeWin": homeWin
        }
        
        scores.append(score_data)

total_games

Away team PAC is not in teams.json. Skipping...
Away team MET is not in teams.json. Skipping...
Away team CEN is not in teams.json. Skipping...


1311

### Get club stats from the season by tabulating each player's score
NHL API doesn't have team stats, so:
1) Find every player's id on every team
2) Get each player's individual stats
3) Append the stats to team_stats

In [10]:
# Get club stats for each team  
import json
with open('teams.json', 'r') as file:
    teams = json.load(file)

# List of player ids for each team
team_rosters = {}

# Cumulative team stats for each team
team_stats = {}
for team in teams:
    abbr = team['abbreviation']
    club_stats = requests.get(f"{api_url}/v1/club-stats/{abbr}/{season}/2").json()

    team_rosters[abbr] = []
    team_stats[abbr] = {
        'goals_per_game': 0,
        'netPlusMinus': 0,
        'totalPIM': 0,
        'PPG': 0,
        'SHG': 0,
        'shots_per_game': 0,
        'goals_against_per_game': 0,
        'shots_against_per_game': 0,
        'points': 0,
        'regulation_wins': 0
    }
    for skater in club_stats['skaters']:
        id = skater['playerId']
        team_rosters[abbr].append(id)

        stats = team_stats[abbr]
        stats['goals_per_game'] += skater['goals']
        stats['netPlusMinus'] += skater['plusMinus']
        stats['totalPIM'] += skater['penaltyMinutes']
        stats['PPG'] += skater['powerPlayGoals']
        stats['SHG'] += skater['shorthandedGoals']
        stats['shots_per_game'] += skater['shots']

    for goalie in club_stats['goalies']:                
        stats = team_stats[abbr]
        stats['shots_against_per_game'] += goalie['shotsAgainst']
        stats['goals_against_per_game'] += goalie['goalsAgainst']
    
        
    team_stats[abbr]['goals_per_game'] /= 82
    team_stats[abbr]['shots_per_game'] /= 82
    team_stats[abbr]['goals_against_per_game'] /= 82
    team_stats[abbr]['shots_against_per_game'] /= 82

### Standings Data
This step is optional, but I imported each team's wins in their season for the use of plotting different stats

In [11]:
# Import standings information
standings = requests.get(f"https://api-web.nhle.com/v1/standings/{season[4:]}-04-14").json()
for team in standings['standings']:
    abbr = team['teamAbbrev']['default']

    team_stats[abbr]['regulation_wins'] = team['regulationWins']
    team_stats[abbr]['points'] = team['points']

### Export data to folder

In [12]:
import csv 

fields = ['awayTeam', 'awayScore', 'homeTeam', 'homeScore', 'homeWin']

with open('all_scores.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fields)
    
    writer.writeheader()
    
    for score in scores:
        writer.writerow(score)

print("CSV file 'all_scores.csv' has been created.")


CSV file 'all_scores.csv' has been created.


In [13]:
output = 'rosters.json'

# Export team_rosters to a JSON file
with open(output, 'w') as file:
    json.dump(team_rosters, file)

print(f"Team rosters have been exported to {output}.")

Team rosters have been exported to rosters.json.


In [14]:
output = 'team_stats.json'

# Export team_rosters to a JSON file
with open(output, 'w') as file:
    json.dump(team_stats, file)

print(f"Team stats have been exported to {output}.")

Team stats have been exported to team_stats.json.
