In [1]:
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
import sqlite3

In [2]:
response = requests.get("https://data.nba.net/10s/prod/v3/today.json")
today_json = response.json()
nba_data_links = today_json['links']

In [3]:
link_values = list(nba_data_links.values())
for link in link_values[2:-2]:
    print("https://data.nba.net{}".format(link))

https://data.nba.net/prod/v1/multi/calendar.json
https://data.nba.net/prod/v2/20220609/scoreboard.json
https://data.nba.net/prod/v2/20220609/scoreboard.json
https://data.nba.net/prod/v2/2021/teams.json
https://data.nba.net/prod/v2/{{gameDate}}/scoreboard.json
https://data.nba.net/prod/v1/2021/players.json
https://data.nba.net/prod/v1/allstar/2018/AS_roster.json
https://data.nba.net/prod/v1/2021/coaches.json
https://data.nba.net/prod/v1/multi/schedule.json
https://data.nba.net/prod/v1/current/standings_conference.json
https://data.nba.net/prod/v1/current/standings_division.json
https://data.nba.net/prod/v1/current/standings_all.json
https://data.nba.net/prod/v1/current/standings_all_no_sort_keys.json
https://data.nba.net/prod/v1/2021/team_stats_rankings.json
https://data.nba.net/prod/v1/2021/team_stats_last_five_games.json
https://data.nba.net/prod/v1/{{gameDate}}/{{gameId}}_preview_article.json
https://data.nba.net/prod/v1/{{gameDate}}/{{gameId}}_recap_article.json
https://data.nba.net

In [4]:
class DataNBANet:
    def __init__(self, year):
        self._year = year
    def get_teams(self):
        request_url = "https://data.nba.net/prod/v2/{}/teams.json".format(self._year)
        response = requests.get(request_url)
        teams_json = response.json()
        teams = [t for t in teams_json['league']['standard'] if t['isNBAFranchise']]
        teams_df = pd.DataFrame(teams)
        return teams_df
    def get_players(self):
        teams = self.get_teams()
        team_ids = teams['teamId'].values
        request_url = "https://data.nba.net/prod/v1/{}/players.json".format(self._year)
        response = requests.get(request_url)
        players_json = response.json()
        players_list = players_json['league']['standard']
        players_list_dict = []
        for p in tqdm(players_list):
            if p['teamId'] in team_ids:
                player_dict = {}
                for k, v in p.items():
                    if isinstance(v, str):
                        player_dict[k] = v
                players_list_dict.append(player_dict)
        players_df = pd.DataFrame(players_list_dict)
        return players_df
    def get_career_summaries(self):
        players = self.get_players()
        person_ids = players['personId']
        career_summaries = []
        for pid in tqdm(person_ids):
            request_url = "https://data.nba.net/prod/v1/{}/players/{}_profile.json".format(self._year, pid)
            response = requests.get(request_url)
            profile_json = response.json()
            career_summary = profile_json['league']['standard']['stats']['careerSummary']
            career_summaries.append(career_summary)
        career_summaries_df = pd.DataFrame(career_summaries)
        career_summaries_df.insert(0, 'personId', person_ids)
        career_summaries_df = career_summaries_df.replace('', np.nan)
        return career_summaries_df

In [5]:
dnn = DataNBANet(2021)
teams = dnn.get_teams()
players = dnn.get_players()
career_summaries = dnn.get_career_summaries()

100%|██████████████████████████████████████| 592/592 [00:00<00:00, 21840.91it/s]
100%|██████████████████████████████████████| 592/592 [00:00<00:00, 81579.26it/s]
100%|█████████████████████████████████████████| 506/506 [05:06<00:00,  1.65it/s]


In [6]:
career_summaries.head()

Unnamed: 0,personId,tpp,ftp,fgp,ppg,rpg,apg,bpg,mpg,spg,...,ftm,fta,pFouls,points,gamesPlayed,gamesStarted,plusMinus,min,dd2,td3
0,1630173,35.7,55.6,46.8,7.2,5.1,0.8,0.5,18.4,0.4,...,134,241,242,968,134,32,-58,2461,9,0
1,203500,7.1,54.7,58.7,9.3,8.0,1.5,1.0,26.8,0.9,...,1044,1907,1650,6157,664,599,1991,17811,139,1
2,1628389,14.0,74.1,55.8,13.5,8.3,3.5,0.9,28.2,1.0,...,1098,1481,839,4617,343,239,560,9665,118,5
3,1630583,12.5,62.5,40.2,4.1,2.7,0.7,0.3,11.2,0.2,...,20,32,36,132,32,0,16,359,2,0
4,200746,32.0,81.3,49.3,19.1,8.1,1.9,1.1,33.7,0.7,...,3709,4563,2598,20558,1076,997,2784,36242,354,0


In [7]:
con = sqlite3.connect('nba.db')
teams.to_sql('teams', con, if_exists='replace', index=False)
players.to_sql('players', con, if_exists='replace', index=False)
career_summaries.to_sql('career_summaries', con, if_exists='replace', index=False)
cur = con.cursor()

In [8]:
create_teams = """
PRAGMA foreign_keys=off;
BEGIN TRANSACTION;
ALTER TABLE teams RENAME TO teams_no_keys;
CREATE TABLE teams (
    city TEXT,
    fullName TEXT,
    isNBAFranchise TEXT,
    confName TEXT,
    tricode TEXT,
    teamShortName TEXT,
    divName TEXT,
    isAllStar TEXT,
    nickname TEXT,
    urlName TEXT,
    teamId INTEGER,
    altCityName TEXT,
    PRIMARY KEY (teamId)
);
INSERT INTO teams SELECT * FROM teams_no_keys;
COMMIT;
PRAGMA foreign_keys=on;
"""
cur.executescript(create_teams)
con.commit()

In [9]:
create_players = """
PRAGMA foreign_keys=off;
BEGIN TRANSACTION;
ALTER TABLE players RENAME TO players_no_keys;
CREATE TABLE players (
    firstName TEXT,
    lastName TEXT,
    temporaryDisplayName TEXT,
    personId INTEGER,
    teamId INTEGER,
    jersey INTEGER,
    pos TEXT,
    heightFeet INTEGER,
    heightInches INTEGER,
    heightMeters REAL,
    weightPounds REAL,
    weightKilograms REAL,
    dateOfBirthUTC TEXT,
    nbaDebutYear INTEGER,
    yearsPro INTEGER,
    collegeName TEXT,
    lastAffiliation TEXT,
    country TEXT,
    PRIMARY KEY (personId),
    FOREIGN KEY (teamId) REFERENCES teams (teamId) 
            ON DELETE CASCADE ON UPDATE NO ACTION
);
INSERT INTO players SELECT * FROM players_no_keys;
COMMIT;
PRAGMA foreign_keys=on;
"""
cur.executescript(create_players)
con.commit()

In [10]:
create_career_summaries = """
PRAGMA foreign_keys=off;
BEGIN TRANSACTION;
ALTER TABLE career_summaries RENAME TO career_summaries_no_keys;
CREATE TABLE career_summaries (
    personId INTEGER,
    tpp REAL,
    ftp REAL,
    fgp REAL,
    ppg REAL,
    rpg REAL,
    apg REAL,
    bpg REAL,
    mpg REAL,
    spg REAL,
    assists INTEGER,
    blocks INTEGER,
    steals INTEGER,
    turnovers INTEGER,
    offReb INTEGER,
    defReb INTEGER,
    totReb INTEGER,
    fgm INTEGER,
    fga INTEGER,
    tpm INTEGER,
    tpa INTEGER,
    ftm INTEGER,
    fta INTEGER,
    pFouls INTEGER,
    points INTEGER,
    gamesPlayed INTEGER,
    gamesStarted INTEGER,
    plusMinus INTEGER,
    min INTEGER,
    dd2 INTEGER,
    td3 INTEGER,
    PRIMARY KEY (personId),
    FOREIGN KEY (personId) REFERENCES players (personId) 
            ON DELETE CASCADE ON UPDATE NO ACTION
);
INSERT INTO career_summaries SELECT * FROM career_summaries_no_keys;
COMMIT;
PRAGMA foreign_keys=on;
"""
cur.executescript(create_career_summaries)
con.commit()

In [11]:
drop_tables = """
DROP TABLE teams_no_keys;
DROP TABLE players_no_keys;
DROP TABLE career_summaries_no_keys;
"""
cur.executescript(drop_tables)
con.commit()

In [12]:
con.close()