In [None]:
from db import db
from nba_api.stats.endpoints import leagueleaders
from nba_api.stats.endpoints import teamplayerdashboard

import pandas as pd
import constants

from bs4 import BeautifulSoup
import requests
import json

In [None]:
# Collect leaderboards for each stat and insert into db
per_code = {
    "Totals": "tot",
    "PerGame": "pg",
    "Per48": "p48",
}

for per_mode in ["Totals", "PerGame", "Per48"]:

    leaders_df = leagueleaders.LeagueLeaders(per_mode48=per_mode, season="2022-23").get_data_frames()[0]

    for stat in constants.new_leaderboard_names:
        stat_df = leaders_df[['PLAYER_ID',stat]].sort_values(stat, ascending=False)
        stat_df.reset_index(inplace=True)
        stat_df['RANK'] = stat_df.index + 1
        stat_df = stat_df[['RANK','PLAYER_ID',stat]]
        stat_df.columns = ['rank','player_id','value']
        
        db.leaderboards.insert_one({
            "_id": f"{stat}_{per_code[per_mode]}",
            "player_id": stat_df['player_id'].tolist(),
            "value": stat_df['value'].tolist(),
            "per_mode": per_mode
        })

        print(f"{stat}_{per_code[per_mode]} done")

In [None]:
# Collect team rosters and insert into db
for team in db.teams.find():
    try:
        team_df = teamplayerdashboard.TeamPlayerDashboard(team_id=team['teamId'], season="2022-23").get_data_frames()[1]
        
        team_roster = team_df['PLAYER_ID'].to_list()
        
        db.teams.update_one({"_id": team["_id"]}, {"$set": {"roster": team_roster}})

        for player in team_roster:
            if db.players.find_one({"_id": player}) is None:
                db.players.insert_one({
                    "_id": player,
                    "name": team_df[team_df['PLAYER_ID'] == player]['PLAYER_NAME'].values[0]
                })
                
        print(f"{team['_id']} done")
    except:
        print(f"{team['_id']} failed")

In [None]:
import copy

# Update team codes in schedule database
code_translate = {
    "BRK": "BKN",
    "CHO": "CHA",
    "PHO": "PHX",
}

for game in db.schedule.find():
    game_code = copy.copy(game['_id'])
    game_code_split = game_code.split('-')
    home, away = game_code_split[0], game_code_split[1]
    
    new_game_code = None
    if home in code_translate.keys():
        new_game_code = f"{code_translate[home]}-{away}-{game_code_split[2]}"
        game["home_code"] = code_translate[home]
        db.schedule.update_one({"_id": game["_id"]}, {"$set": {"schedule.home_code": code_translate[home]}})
        
    if away in code_translate.keys():
        if new_game_code:
            new_game_code = f"{code_translate[home]}-{code_translate[away]}-{game_code_split[2]}"
        else:
            new_game_code = f"{home}-{code_translate[away]}-{game_code_split[2]}"
        db.schedule.update_one({"_id": game["_id"]}, {"$set": {"schedule.away_code": code_translate[away]}})
        
    if new_game_code:
        db.schedule.insert_one({
            "_id": new_game_code,
            "schedule": game["schedule"]
        })
        db.schedule.delete_one({"_id": game_code})


In [None]:
# Iterate over each player and update bio info
for player in db.players.find():
    try:
        # Scrape player data from nba.com
        player_id = player["_id"]
        url = f"https://www.nba.com/player/{player_id}/"
        soup = BeautifulSoup(requests.get(url).text, 'html5lib')

        # Parse player data from html
        headshot = soup.find('img', {'class': 'PlayerSummary_playerImage__sysif'})['src']
        try:
            player_info = soup.find('p', {'class': 'PlayerSummary_mainInnerInfo__jv3LO'}).text.split(" | ")
        except:
            team, number, position = None, None, None
        else:
            team, number, position = player_info[0], int(player_info[1].replace('#','')), player_info[2]
        player_bio_data = []
        for idx, x in enumerate(soup.find_all('p', {'class': 'PlayerSummary_playerInfoValue__JS8_v'})[:8]):
            if idx in [0, 1]:
                player_bio_data.append(x.text.split(" ")[0])
            else:
                player_bio_data.append(x.text)

        # Find db entry with same id and update
        db.players.find_one_and_update(
            {"_id": player_id}, 
            {"$set": {
                "headshot": headshot,
                "team": team,
                "number": number,
                "position": position,
                "height": player_bio_data[0],
                "weight": player_bio_data[1],
                "country": player_bio_data[2],
                "age": player_bio_data[4],
                "draft": player_bio_data[6],
                "experience": player_bio_data[7],
            }}
        )
    except:
        print(f"{player['name']} failed")
        continue
    else:
        print(f"{player['name']} done")