# Retrieving NBA Player Data
From NBA API

In [2]:
import json
import requests
import pandas as pd

In [73]:
# API links list
urlRequest = requests.get("http://data.nba.net/10s/prod/v1/today.json")
urlJson = url.json()

In [354]:
# All NBA players
playersUrl = "http://data.nba.net/10s/prod/v1/2016/players.json"
playersRequest = requests.get(playersUrl)
playersJson = playersRequest.json()
playersDf = pd.DataFrame(playersJson["league"]["standard"])

# Unnest "draft" column
draft = {
    "personId": [],
    "draftTeam": [],
    "draftPickNum": [],
    "draftRoundNum": [],
    "draftYear": []
}

for i, row  in playersDf[["personId","draft"]].iterrows():
    draft["personId"].append(row["personId"])
    draft["draftTeam"].append(row["draft"]["teamId"])
    draft["draftPickNum"].append(row["draft"]["pickNum"])
    draft["draftRoundNum"].append(row["draft"]["roundNum"])
    draft["draftYear"].append(row["draft"]["seasonYear"])
    
draftDf = pd.DataFrame(draft)

playersDfClean = playersDf.merge(draftDf, how="left", on="personId")
playersDfClean = playersDfClean.drop(["draft"], axis=1)

# drop "teams" column
playersDfClean = playersDfClean.drop(["teams"], axis=1)

#clean "teamId" column
playersDfClean["teamId"] = playersDfClean["teamId"].str.split().str[0]

In [355]:
playersDfClean.head()

Unnamed: 0,firstName,lastName,personId,teamId,jersey,pos,heightFeet,heightInches,heightMeters,weightPounds,...,dateOfBirthUTC,nbaDebutYear,yearsPro,collegeName,lastAffiliation,country,draftTeam,draftPickNum,draftRoundNum,draftYear
0,Alex,Abrines,203518,1610612760,8,G-F,6,6,1.98,190,...,1993-08-01,2016,0,,Spain/Spain,Spain,1610612760,32,2,2013
1,Quincy,Acy,203112,1610612751,13,F,6,7,2.01,240,...,1990-10-06,2012,4,Baylor,Baylor/USA,USA,1610612761,37,2,2012
2,Steven,Adams,203500,1610612760,12,C,7,0,2.13,255,...,1993-07-20,2013,3,Pittsburgh,Pittsburgh/New Zealand,New Zealand,1610612760,12,1,2013
3,Arron,Afflalo,201167,1610612758,40,G,6,5,1.96,210,...,1985-10-15,2007,9,UCLA,UCLA/USA,USA,1610612765,27,1,2007
4,Alexis,Ajinca,201582,1610612740,42,C,7,2,2.18,248,...,1988-05-06,2008,6,"Saint Etienne, France",St. Etienne/France,France,1610612766,20,1,2008


In [10]:
# All NBA teams
teamsUrl = "http://data.nba.net/prod/v2/2021/teams.json"
teamsRequest = requests.get(teamsUrl)
teamsJson = teamsRequest.json()
teamsDf = pd.DataFrame.from_dict(teamsJson["league"]["standard"])

In [11]:
teamsDf.head()

Unnamed: 0,city,fullName,isNBAFranchise,confName,tricode,teamShortName,divName,isAllStar,nickname,urlName,teamId,altCityName
0,Atlanta,Atlanta Hawks,True,East,ATL,Atlanta,Southeast,False,Hawks,hawks,1610612737,Atlanta
1,Boston,Boston Celtics,True,East,BOS,Boston,Atlantic,False,Celtics,celtics,1610612738,Boston
2,Brooklyn,Brooklyn Nets,True,East,BKN,Brooklyn,Atlantic,False,Nets,nets,1610612751,Brooklyn
3,Charlotte,Charlotte Hornets,True,East,CHA,Charlotte,Southeast,False,Hornets,hornets,1610612766,Charlotte
4,Chicago,Chicago Bulls,True,East,CHI,Chicago,Central,False,Bulls,bulls,1610612741,Chicago


In [305]:
# Stats for all NBA players
playerIds = playersDf["personId"]

statsDf = pd.DataFrame()

for playerId in playerIds:
    playerStatsUrl ="http://data.nba.net/prod/v1/2021/players/" + playerId + "_profile.json"
    playerStatsRequest = requests.get(playerStatsUrl)
    
    # If no stats data available for that player
    if playerStatsRequest.status_code == 404:
        continue
        
    playerStatsJson = playerStatsRequest.json()
    playerStatsDf = pd.json_normalize(playerStatsJson["league"]["standard"]["stats"])
    playerStatsDf["playerId"] = playerId

    statsDf = statsDf.append(playerStatsDf)

In [326]:
statsDf.head()

Unnamed: 0,latest.seasonYear,latest.seasonStageId,latest.ppg,latest.rpg,latest.apg,latest.mpg,latest.topg,latest.spg,latest.bpg,latest.tpp,...,careerSummary.pFouls,careerSummary.points,careerSummary.gamesPlayed,careerSummary.gamesStarted,careerSummary.plusMinus,careerSummary.min,careerSummary.dd2,careerSummary.td3,regularSeason.season,playerId
0,2021,2,6.7,7.8,2.7,24.5,1.5,0.8,0.7,0.0,...,1529,5755,606,542,1591,16260,126,1,"[{'seasonYear': 2021, 'teams': [{'teamId': '16...",203500
0,2021,2,13.6,5.9,0.9,21.8,0.7,0.3,1.2,39.1,...,2550,20196,1047,986,2830,35584,353,0,"[{'seasonYear': 2021, 'teams': [{'teamId': '16...",200746
0,2020,2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,1302,5350,711,445,154,17698,56,0,"[{'seasonYear': 2020, 'teams': [{'teamId': '0'...",202329
0,2021,2,8.0,5.3,2.4,21.9,1.0,0.8,0.4,36.2,...,673,2997,454,239,247,9500,12,1,"[{'seasonYear': 2021, 'teams': [{'teamId': '16...",203937
0,2021,2,27.1,11.7,6.0,32.4,3.0,1.2,1.8,28.4,...,1813,12809,607,542,1786,19735,268,25,"[{'seasonYear': 2021, 'teams': [{'teamId': '16...",203507


# Merging

Merging dataframes together to fill in information on players' current teams, draft teams, and stats.

In [360]:
nbaData = playersDfClean.merge(teamsDf, how="left", on="teamId")
teamsDf["teamId"] = teamsDf["teamId"].astype("float")
nbaData = nbaData.merge(teamsDf, how="left", left_on="draftTeam", right_on="teamId", suffixes=["current", "draft"])
nbaData = nbaData.merge(statsDf, how="left", left_on="personId", right_on="playerId")

In [23]:
nbaData.head()

Unnamed: 0,firstName,lastName,personId,teamIdcurrent,jersey,pos,heightFeet,heightInches,heightMeters,weightPounds,...,isNBAFranchisedraft,confNamedraft,tricodedraft,teamShortNamedraft,divNamedraft,isAllStardraft,nicknamedraft,urlNamedraft,teamIddraft,altCityNamedraft
0,Alex,Abrines,203518,1610613000.0,8.0,G-F,6,6,1.98,190.0,...,True,West,OKC,Oklahoma City,Northwest,False,Thunder,thunder,1610613000.0,Oklahoma City
1,Quincy,Acy,203112,1610613000.0,13.0,F,6,7,2.01,240.0,...,True,East,TOR,Toronto,Atlantic,False,Raptors,raptors,1610613000.0,Toronto
2,Steven,Adams,203500,1610613000.0,12.0,C,7,0,2.13,255.0,...,True,West,OKC,Oklahoma City,Northwest,False,Thunder,thunder,1610613000.0,Oklahoma City
3,Arron,Afflalo,201167,1610613000.0,40.0,G,6,5,1.96,210.0,...,True,East,DET,Detroit,Central,False,Pistons,pistons,1610613000.0,Detroit
4,Alexis,Ajinca,201582,1610613000.0,42.0,C,7,2,2.18,248.0,...,True,East,CHA,Charlotte,Southeast,False,Hornets,hornets,1610613000.0,Charlotte


In [26]:
#statsDf.to_csv("stats.csv")
nbaData.to_csv("nbaData.csv")

# Scraping Player Salaries

From https://hoopshype.com/salaries/players/

In [1]:
from bs4 import BeautifulSoup

In [36]:
url = "https://hoopshype.com/salaries/players/"
request = requests.get(url)
soup = BeautifulSoup(request.text, "html.parser")
salaryTableHtml = soup.find_all("table")

salaryDf = pd.read_html(str(salaryTableHtml))[0]
salaryDf = salaryDf.drop(["Unnamed: 0"], axis=1)

In [37]:
salaryDf.head()

Unnamed: 0,Player,2021/22,2022/23,2023/24,2024/25,2025/26,2026/27
0,Stephen Curry,"$45,780,966","$48,070,014","$51,915,615","$55,761,217","$59,606,817",$0
1,James Harden,"$44,310,840","$47,366,760",$0,$0,$0,$0
2,John Wall,"$44,310,840","$47,366,760",$0,$0,$0,$0
3,Russell Westbrook,"$44,211,146","$47,063,478",$0,$0,$0,$0
4,LeBron James,"$41,180,544","$44,474,988",$0,$0,$0,$0


In [39]:
salaryDf.to_csv("salary.csv")

# Scraping Team LatLong

From https://en.wikipedia.org/wiki/National_Basketball_Association

In [39]:
url = "https://en.wikipedia.org/wiki/National_Basketball_Association"
request = requests.get(url)
soup = BeautifulSoup(request.text, "html.parser")
coords = soup.select(".geo-dec")
teams = soup.select(".wikitable b a")

coords_text = [coord.text for coord in coords]
teams_text = [team.text for team in teams]

latlongDf = pd.DataFrame({"team" : teams_text, "coordinates": coords_text})

latlongDf["latitude"] = latlongDf["coordinates"].str.replace("[NW]", "", regex=True).str.split("°").str[0]
latlongDf["longitude"] = latlongDf["coordinates"].str.replace("[NW]", "", regex=True).str.split("°").str[1]
latlongDf = latlongDf.drop(["coordinates"], axis=1)

latlongDf.head()


In [70]:
latlongDf.head()

Unnamed: 0,team,latitude,longitude
0,Boston Celtics,42.366303,71.062228
1,Brooklyn Nets,40.68265,73.974689
2,New York Knicks,40.750556,73.993611
3,Philadelphia 76ers,39.901111,75.171944
4,Toronto Raptors,43.643333,79.379167


In [71]:
latlongDf.to_csv("latlong.csv")