In [3]:
import requests
import pandas as pd
import time
# Get Team Name and associated with its ID in the NHL API JSON request

teamID_url = "https://statsapi.web.nhl.com/api/v1/teams"
   
response = requests.get(teamID_url)

response_json = response.json()
team_info = {}

for team in response_json["teams"]:
    team_id = team["id"]
    team_name = team["name"]
    team_info[team_id] = team_name
   
team_info_df = pd.DataFrame.from_dict(team_info, orient="index", columns=["Team Name"])
team_info_df.reset_index(inplace=True)
team_info_df.columns = ["Team ID", "Team Name"]

team_info_df


Unnamed: 0,Team ID,Team Name
0,1,New Jersey Devils
1,2,New York Islanders
2,3,New York Rangers
3,4,Philadelphia Flyers
4,5,Pittsburgh Penguins
5,6,Boston Bruins
6,7,Buffalo Sabres
7,8,Montréal Canadiens
8,9,Ottawa Senators
9,10,Toronto Maple Leafs


In [4]:
import requests
import pandas as pd
import time
# Function call to get team stats
def get_team_stats(team_id, season):
    url = f"https://statsapi.web.nhl.com/api/v1/teams/{team_id}?expand=team.stats&season={season}"
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for request errors
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for team {team_id}, season {season}: {e}")
        return None
# Function call to extract the team_stat data
def extract_team_stats(data, season_year, team_name, team_id):
    if 'teams' in data and data['teams']:
        team_data = data['teams'][0]
        if 'teamStats' in team_data and team_data['teamStats']:
            team_stats = team_data['teamStats'][0]['splits'][0]['stat']
            team_stats['Team ID'] = team_id
            team_stats['Season Year'] = season_year
            
            return team_stats
    return None

if __name__ == "__main__":
    teamID_url = "https://statsapi.web.nhl.com/api/v1/teams"
    response = requests.get(teamID_url)
    response_json = response.json()

    team_info = {}
    for team in response_json["teams"]:
        team_id = team["id"]
        team_info[team_id] = team_name

    team_info_df = pd.DataFrame.from_dict(team_info, orient="index", columns=["Team Name"])
    team_info_df.reset_index(inplace=True)
    team_info_df.columns = ["Team ID", "Team Name"]

    seasons = [f"{year}{year + 1}" for year in range(1991, 2023)]  

    team_stats_list = []

    for _, row in team_info_df.iterrows():
        team_id = row['Team ID']
        for season in seasons:
            data = get_team_stats(team_id, season)
            if data is not None:
                season_year = f"{int(season[:4])}-{int(season[4:])}"
                team_stats = extract_team_stats(data, season_year, team_name, team_id)
                if team_stats:
                    team_stats_list.append(team_stats)
                else:
                    print(f"No team statistics available for team {team_name} in season {season_year}.")
            time.sleep(0.5)  # Add a delay of 0.5 seconds between API calls

    # Create a DataFrame from the list of team statistics
    team_stats_df = pd.DataFrame(team_stats_list)

    # Reorder the columns with team name, team ID, season, and additional columns at the beginning of the DataFrame
    team_stats_df = team_stats_df[[ "Team ID","Season Year",] + team_stats_df.columns[:-8].tolist()]

 

No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
Error fetching data for team 9, season 19911992: 404 Client Error: Not Found for url: https://statsapi.web.nhl.com/api/v1/teams/9?expand=team.stats&season=19911992
No team statistics available for team Seattle Kraken in season 2004-2005.
No team statistics available for team Seattle Kraken in season 2004-2005.
Error fetching data for team 12, season 19911992: 404 Client Error: Not Found for url: https://s

In [5]:
team_stats_df.head(5)

Unnamed: 0,Team ID,Season Year,gamesPlayed,wins,losses,ot,pts,ptPctg,goalsPerGame,goalsAgainstPerGame,...,powerPlayOpportunities,penaltyKillPercentage,shotsPerGame,shotsAllowed,winScoreFirst,winOppScoreFirst,winLeadFirstPer,winLeadSecondPer,winOutshootOpp,winOutshotByOpp
0,1,1991-1992,80,38,31,0,87,54.4,3.613,3.238,...,338.0,81.8,31.1875,28.625,0.711,0.262,0.793,0.805,0.49,0.414
1,1,1992-1993,84,40,37,0,87,51.8,3.667,3.56,...,400.0,81.4,33.0238,29.8095,0.641,0.333,0.769,0.838,0.51,0.452
2,1,1993-1994,84,47,25,0,106,63.1,3.643,2.619,...,333.0,81.1,31.631,29.0119,0.717,0.368,0.897,0.921,0.607,0.444
3,1,1994-1995,48,22,18,0,52,54.2,2.833,2.521,...,164.0,81.2,30.0833,25.375,0.6,0.304,0.667,0.714,0.533,0.313
4,1,1995-1996,82,37,33,0,86,52.4,2.622,2.463,...,368.0,84.6,32.1585,26.5,0.643,0.25,0.677,0.774,0.475,0.381


In [6]:
 # Remove hyphens from the "Season Year" column
team_stats_df["Season Year"] = team_stats_df["Season Year"].str.replace("-", "")
team_stats_df['Season Year'] = team_stats_df["Season Year"].astype(int)
columns_to_convert = ['ptPctg', 'powerPlayPercentage','penaltyKillPercentage']
team_stats_df[columns_to_convert] = team_stats_df[columns_to_convert].astype(float)

team_stats_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 854 entries, 0 to 853
Data columns (total 24 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Team ID                 854 non-null    int64  
 1   Season Year             854 non-null    int32  
 2   gamesPlayed             854 non-null    int64  
 3   wins                    854 non-null    int64  
 4   losses                  854 non-null    int64  
 5   ot                      854 non-null    int64  
 6   pts                     854 non-null    int64  
 7   ptPctg                  854 non-null    float64
 8   goalsPerGame            854 non-null    float64
 9   goalsAgainstPerGame     854 non-null    float64
 10  evGGARatio              854 non-null    float64
 11  powerPlayPercentage     854 non-null    float64
 12  powerPlayGoals          854 non-null    float64
 13  powerPlayGoalsAgainst   854 non-null    float64
 14  powerPlayOpportunities  854 non-null    fl

In [7]:
team_stats_df.rename(columns={
                              "Team ID": "team_id", 
                              "Season Year": "season_id", 
                              "gamesPlayed": "team_stats_gamesPlayed", 
                              "wins": "team_stats_wins", 
                              "losses": "team_stats_losses",
                              "ot": "team_stats_ot",
                              "pts": "team_stats_pts",
                              "ptPctg":"team_stats_ptPctg",
                              "goalsPerGame": "team_stats_goalsPerGame",
                              "goalsAgainstPerGame": "team_stats_goalsAgainstPerGame",
                              "evGGARatio": "team_stats_evGGARatio",
                              "powerPlayPercentage": "team_stats_powerPlayPercentage",
                              "powerPlayGoals": "team_stats_powerPlayGoals",
                              "powerPlayGoalsAgainst": "team_stats_powerPlayGoalsAgainst",
                              "powerPlayOpportunities":"team_stats_powerPlayOpportunities",
                              "penaltyKillPercentage":"team_stats_penaltyKillPercentage",
                              "shotsPerGame":"team_stats_shotsPerGame",
                              "shotsAllowed":"team_stats_shotsAllowed",
                              "winScoreFirst":"team_stats_winScoreFirst",
                              "winOppScoreFirst":"team_stats_winOppScoreFirst",
                              "winLeadFirstPer":"team_stats_winLeadFirstPer",
                              "winLeadSecondPer":"team_stats_winLeadSecondPer",
                              "winOutshootOpp":"team_stats_winOutshootOpp",
                              "winOutshotByOpp":"team_stats_winOutshotByOpp",
                              "faceOffsTaken":"team_stats_faceOffsTaken",
                              "faceOffsWon":"team_stats_faceOffsWon",
                              "faceOffsLost":"team_stats_faceOffsLost",
                              "faceOffWinPercentage":"team_stats_faceOffWinPercentage",
                              "shootingPctg":"team_stats_shootingPctg",
                              "savePctg":"team_stats_savePctg"

                              }, inplace=True)


In [8]:
team_stats_df.head(5)

Unnamed: 0,team_id,season_id,team_stats_gamesPlayed,team_stats_wins,team_stats_losses,team_stats_ot,team_stats_pts,team_stats_ptPctg,team_stats_goalsPerGame,team_stats_goalsAgainstPerGame,...,team_stats_powerPlayOpportunities,team_stats_penaltyKillPercentage,team_stats_shotsPerGame,team_stats_shotsAllowed,team_stats_winScoreFirst,team_stats_winOppScoreFirst,team_stats_winLeadFirstPer,team_stats_winLeadSecondPer,team_stats_winOutshootOpp,team_stats_winOutshotByOpp
0,1,19911992,80,38,31,0,87,54.4,3.613,3.238,...,338.0,81.8,31.1875,28.625,0.711,0.262,0.793,0.805,0.49,0.414
1,1,19921993,84,40,37,0,87,51.8,3.667,3.56,...,400.0,81.4,33.0238,29.8095,0.641,0.333,0.769,0.838,0.51,0.452
2,1,19931994,84,47,25,0,106,63.1,3.643,2.619,...,333.0,81.1,31.631,29.0119,0.717,0.368,0.897,0.921,0.607,0.444
3,1,19941995,48,22,18,0,52,54.2,2.833,2.521,...,164.0,81.2,30.0833,25.375,0.6,0.304,0.667,0.714,0.533,0.313
4,1,19951996,82,37,33,0,86,52.4,2.622,2.463,...,368.0,84.6,32.1585,26.5,0.643,0.25,0.677,0.774,0.475,0.381


In [9]:
team_stats_df.insert(0,'team_stat_id', '')
team_stats_df.head(5)


Unnamed: 0,team_stat_id,team_id,season_id,team_stats_gamesPlayed,team_stats_wins,team_stats_losses,team_stats_ot,team_stats_pts,team_stats_ptPctg,team_stats_goalsPerGame,...,team_stats_powerPlayOpportunities,team_stats_penaltyKillPercentage,team_stats_shotsPerGame,team_stats_shotsAllowed,team_stats_winScoreFirst,team_stats_winOppScoreFirst,team_stats_winLeadFirstPer,team_stats_winLeadSecondPer,team_stats_winOutshootOpp,team_stats_winOutshotByOpp
0,,1,19911992,80,38,31,0,87,54.4,3.613,...,338.0,81.8,31.1875,28.625,0.711,0.262,0.793,0.805,0.49,0.414
1,,1,19921993,84,40,37,0,87,51.8,3.667,...,400.0,81.4,33.0238,29.8095,0.641,0.333,0.769,0.838,0.51,0.452
2,,1,19931994,84,47,25,0,106,63.1,3.643,...,333.0,81.1,31.631,29.0119,0.717,0.368,0.897,0.921,0.607,0.444
3,,1,19941995,48,22,18,0,52,54.2,2.833,...,164.0,81.2,30.0833,25.375,0.6,0.304,0.667,0.714,0.533,0.313
4,,1,19951996,82,37,33,0,86,52.4,2.622,...,368.0,84.6,32.1585,26.5,0.643,0.25,0.677,0.774,0.475,0.381


In [10]:
   # Save the DataFrame to a CSV file
csv_filename = "team_stats.csv"
team_stats_df.to_csv(csv_filename, index=False,encoding = 'utf-8-sig')