In [1]:
from db import db

import pandas as pd
import constants

from bs4 import BeautifulSoup
import requests
import json

In [None]:
# Insert team rosters into the db
for id in constants.nhl_team_ids:
    url = f"https://statsapi.web.nhl.com/api/v1/teams/{id}/roster"
    team_response = dict(requests.get(url).json())
    roster_list = [player['person']['id'] for player in team_response['roster']]
    db.nhl_teams.insert_one({
        "_id": id,
        "roster": roster_list
    })

In [None]:
# Insert team data into the database
for id in constants.nhl_team_ids:
    url = f"https://statsapi.web.nhl.com/api/v1/teams/{id}"
    team_response = dict(requests.get(url).json())
    db.nhl_teams.update_one({'_id': id}, {'$set': {
        'name': team_response['teams'][0]['name'],
        'abbr': team_response['teams'][0]['abbreviation'],
        'short': team_response['teams'][0]['teamName'],
        'city': team_response['teams'][0]['locationName']
    }})

In [None]:
# Insert all non-goalie players into db
for team in db.nhl_teams.find():
    for player_id in team['roster']:
        url_bio = f"https://statsapi.web.nhl.com/api/v1/people/{player_id}"
        bio_response = dict(requests.get(url_bio).json())
        url_stats = f"https://statsapi.web.nhl.com/api/v1/people/{player_id}/stats?stats=statsSingleSeason&season=20222023"
        stats_response = dict(requests.get(url_stats).json())
        try:
            if stats_response['stats'][0]['splits']:
                db.nhl_players.insert_one({
                    "_id": player_id,
                    "name": bio_response['people'][0]['fullName'],
                    "number": bio_response['people'][0]['primaryNumber'],
                    "age": bio_response['people'][0]['currentAge'],
                    "nationality": bio_response['people'][0]['nationality'],
                    "height": bio_response['people'][0]['height'],
                    "weight": bio_response['people'][0]['weight'],
                    "captain": bio_response['people'][0]['captain'],
                    "rookie": bio_response['people'][0]['rookie'],
                    "shoots": bio_response['people'][0]['shootsCatches'],
                    "team": bio_response['people'][0]['currentTeam']['id'], 
                    "pos_name": bio_response['people'][0]['primaryPosition']['name'],
                    "pos_type": bio_response['people'][0]['primaryPosition']['type'],
                    "timeOnIce": stats_response['stats'][0]['splits'][0]['stat']['timeOnIce'],
                    "assists": stats_response['stats'][0]['splits'][0]['stat']['assists'],
                    "goals": stats_response['stats'][0]['splits'][0]['stat']['goals'],
                    "pim": stats_response['stats'][0]['splits'][0]['stat']['pim'],
                    "shots": stats_response['stats'][0]['splits'][0]['stat']['shots'], 
                    "games": stats_response['stats'][0]['splits'][0]['stat']['games'],
                    "hits": stats_response['stats'][0]['splits'][0]['stat']['hits'],
                    "powerPlayGoals": stats_response['stats'][0]['splits'][0]['stat']['powerPlayGoals'],
                    "powerPlayPoints": stats_response['stats'][0]['splits'][0]['stat']['powerPlayPoints'],
                    "powerPlayTimeOnIce": stats_response['stats'][0]['splits'][0]['stat']['powerPlayTimeOnIce'],
                    "evenTimeOnIce": stats_response['stats'][0]['splits'][0]['stat']['evenTimeOnIce'],
                    "penaltyMinutes": stats_response['stats'][0]['splits'][0]['stat']['penaltyMinutes'],
                    "faceOffPct": stats_response['stats'][0]['splits'][0]['stat']['faceOffPct'],
                    "shotPct": stats_response['stats'][0]['splits'][0]['stat']['shotPct'],
                    "gameWinningGoals": stats_response['stats'][0]['splits'][0]['stat']['gameWinningGoals'],
                    "overTimeGoals": stats_response['stats'][0]['splits'][0]['stat']['overTimeGoals'],
                    "shortHandedGoals": stats_response['stats'][0]['splits'][0]['stat']['shortHandedGoals'],
                    "shortHandedPoints": stats_response['stats'][0]['splits'][0]['stat']['shortHandedPoints'],
                    "shortHandedTimeOnIce": stats_response['stats'][0]['splits'][0]['stat']['shortHandedTimeOnIce'],
                    "blocked": stats_response['stats'][0]['splits'][0]['stat']['blocked'],
                    "plusMinus": stats_response['stats'][0]['splits'][0]['stat']['plusMinus'],
                    "points": stats_response['stats'][0]['splits'][0]['stat']['points'],
                    "shifts": stats_response['stats'][0]['splits'][0]['stat']['shifts'],
                    "timeOnIcePerGame": stats_response['stats'][0]['splits'][0]['stat']['timeOnIcePerGame'],
                    "evenTimeOnIcePerGame": stats_response['stats'][0]['splits'][0]['stat']['evenTimeOnIcePerGame'],
                    "shortHandedTimeOnIcePerGame": stats_response['stats'][0]['splits'][0]['stat']['shortHandedTimeOnIcePerGame'],
                    "powerPlayTimeOnIcePerGame": stats_response['stats'][0]['splits'][0]['stat']['powerPlayTimeOnIcePerGame']
                })
            else:
                db.nhl_players.insert_one({
                    "_id": player_id,
                    "name": bio_response['people'][0]['fullName'],
                    "number": bio_response['people'][0]['primaryNumber'],
                    "age": bio_response['people'][0]['currentAge'],
                    "nationality": bio_response['people'][0]['nationality'],
                    "height": bio_response['people'][0]['height'],
                    "weight": bio_response['people'][0]['weight'],
                    "captain": bio_response['people'][0]['captain'],
                    "rookie": bio_response['people'][0]['rookie'],
                    "shoots": bio_response['people'][0]['shootsCatches'],
                    "team": bio_response['people'][0]['currentTeam']['id'], 
                    "pos_name": bio_response['people'][0]['primaryPosition']['name'],
                    "pos_type": bio_response['people'][0]['primaryPosition']['type'],
                })
        except:
            print(f"Error with player {player_id}")
            continue

In [None]:
# Insert all goalies into the database
for player_id in [8477970,
8478406,
8481033,
8473575,
8478009,
8470860,
8478048,
8478433,
8479394,
8481035,
8477465,
8479193,
8476999,
8480280,
8467950,
8477480,
8480045,
8474596,
8478470,
8476341,
8475660,
8481544,
8476899,
8478492,
8479361,
8475883,
8477293,
8481519,
8475683,
8479312,
8470880,
8476883,
8475311,
8479292,
8471774,
8475852,
8483530,
8476433,
8478024,
8477424,
8480947,
8471306,
8476412,
8474593,
8478435,
8477992,
8480382,
8480925,
8475789,
8479973,
8477967,
8477484,
8480420,
8481668,
8476434,
8476932,
8480843,
8475809,
8479979,
8471734,
8477831,
8473503,
8478039,
8476914,
8478007,
8470594,
8479406,
8476945,
8479496,
8478872,
8478971,
8480313,
8476316,
8478499,
8474889,
8475831,]:
    url_bio = f"https://statsapi.web.nhl.com/api/v1/people/{player_id}"
    bio_response = dict(requests.get(url_bio).json())
    url_stats = f"https://statsapi.web.nhl.com/api/v1/people/{player_id}/stats?stats=statsSingleSeason&season=20222023"
    stats_response = dict(requests.get(url_stats).json())
    try:
        if stats_response['stats'][0]['splits']:
            db.nhl_goalies.insert_one({
                "_id": player_id,
                "name": bio_response['people'][0]['fullName'],
                "number": bio_response['people'][0]['primaryNumber'],
                "age": bio_response['people'][0]['currentAge'],
                "nationality": bio_response['people'][0]['nationality'],
                "height": bio_response['people'][0]['height'],
                "weight": bio_response['people'][0]['weight'],
                "captain": bio_response['people'][0]['captain'],
                "rookie": bio_response['people'][0]['rookie'],
                "shoots": bio_response['people'][0]['shootsCatches'],
                "team": bio_response['people'][0]['currentTeam']['id'], 
                "pos_name": bio_response['people'][0]['primaryPosition']['name'],
                "pos_type": bio_response['people'][0]['primaryPosition']['type'],
                "timeOnIce": stats_response['stats'][0]['splits'][0]['stat']['timeOnIce'],
                "ot": stats_response['stats'][0]['splits'][0]['stat']['ot'],
                "shutouts": stats_response['stats'][0]['splits'][0]['stat']['shutouts'],
                "ties": stats_response['stats'][0]['splits'][0]['stat']['ties'],
                "wins": stats_response['stats'][0]['splits'][0]['stat']['wins'], 
                "losses": stats_response['stats'][0]['splits'][0]['stat']['losses'],
                "saves": stats_response['stats'][0]['splits'][0]['stat']['saves'],
                "powerPlaySaves": stats_response['stats'][0]['splits'][0]['stat']['powerPlaySaves'],
                "shortHandedSaves": stats_response['stats'][0]['splits'][0]['stat']['shortHandedSaves'],
                "evenSaves": stats_response['stats'][0]['splits'][0]['stat']['evenSaves'],
                "shortHandedShots": stats_response['stats'][0]['splits'][0]['stat']['shortHandedShots'],
                "evenShots": stats_response['stats'][0]['splits'][0]['stat']['evenShots'],
                "powerPlayShots": stats_response['stats'][0]['splits'][0]['stat']['powerPlayShots'],
                "savePercentage": stats_response['stats'][0]['splits'][0]['stat']['savePercentage'],
                "goalAgainstAverage": stats_response['stats'][0]['splits'][0]['stat']['goalAgainstAverage'],
                "games": stats_response['stats'][0]['splits'][0]['stat']['games'],
                "gamesStarted": stats_response['stats'][0]['splits'][0]['stat']['gamesStarted'],
                "shotsAgainst": stats_response['stats'][0]['splits'][0]['stat']['shotsAgainst'],
                "goalsAgainst": stats_response['stats'][0]['splits'][0]['stat']['goalsAgainst'],
                "timeOnIcePerGame": stats_response['stats'][0]['splits'][0]['stat']['timeOnIcePerGame'],
                "powerPlaySavePercentage": stats_response['stats'][0]['splits'][0]['stat']['powerPlaySavePercentage'],
                "shortHandedSavePercentage": stats_response['stats'][0]['splits'][0]['stat']['shortHandedSavePercentage'],
                "evenStrengthSavePercentage": stats_response['stats'][0]['splits'][0]['stat']['evenStrengthSavePercentage'],
            })
        else:
            db.nhl_goalies.insert_one({
                "_id": player_id,
                "name": bio_response['people'][0]['fullName'],
                "number": bio_response['people'][0]['primaryNumber'],
                "age": bio_response['people'][0]['currentAge'],
                "nationality": bio_response['people'][0]['nationality'],
                "height": bio_response['people'][0]['height'],
                "weight": bio_response['people'][0]['weight'],
                "captain": bio_response['people'][0]['captain'],
                "rookie": bio_response['people'][0]['rookie'],
                "shoots": bio_response['people'][0]['shootsCatches'],
                "team": bio_response['people'][0]['currentTeam']['id'], 
                "pos_name": bio_response['people'][0]['primaryPosition']['name'],
                "pos_type": bio_response['people'][0]['primaryPosition']['type'],
            })
    except:
        print(f"Error with player {player_id}")
        continue

In [6]:
cols = ['assists','goals','pim','shots','games','hits','powerPlayGoals','powerPlayPoints']

# Iterate over all players and create leaderboard table
leaderboard_df = []
for player in db.nhl_players.find():
    stats = [player['_id']]
    for col in cols:
        if col in player:
            stats.append(player[col])
        else:
            stats.append(0)
    leaderboard_df.append(stats)
leaderboard_df = pd.DataFrame(leaderboard_df, columns=['id']+cols)

# Create a document of the top 20 players in each category and insert into courtside.nhl_leaderboard collection
for col in cols:
    top_100 = leaderboard_df.sort_values(by=col, ascending=False).head(100)
    db.nhl_leaderboards.insert_one({
        '_id': f"{col}_tot",
        'player_id': top_100['id'].values.tolist(),
        'value': top_100[col].values.tolist(),
        'per_mode': "Totals",
        'name': col,
    })

TypeError: '<' not supported between instances of 'int' and 'str'