In [2]:
import bs4
from bs4 import BeautifulSoup
import datetime
import time
import pandas as pd
import lxml
import html
import pip._vendor.requests as requests
import html5lib

Web scraping Basketball stats. Namely the Standings table for the 22/23 regular season, Point per Game of Members of the Teams and Full RG Schedule of each team

In [3]:
url = "https://www.basketball-reference.com/leagues/NBA_2023_standings.html" #Website
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}

In [4]:
page = requests.get(url, headers=headers) #Request access to the HTML of the site 
soup = BeautifulSoup(page.content, "html.parser") #Use BS4 to parse the HTML, making it understandable

Standings= pd.read_html(page.content, match='Table') #Read in the Standings table. There are many tables
Standings #I want just the first two tables which are Eastern and Western conference standings

[          Eastern Conference   W   L   W/L%    GB   PS/G   PA/G   SRS
 0       Milwaukee Bucks* (1)  58  24  0.707     —  116.9  113.3  3.61
 1        Boston Celtics* (2)  57  25  0.695   1.0  117.9  111.4  6.38
 2    Philadelphia 76ers* (3)  54  28  0.659   4.0  115.2  110.9  4.37
 3   Cleveland Cavaliers* (4)  51  31  0.622   7.0  112.3  106.9  5.23
 4       New York Knicks* (5)  47  35  0.573  11.0  116.0  113.1  2.99
 5         Brooklyn Nets* (6)  45  37  0.549  13.0  113.4  112.5  1.03
 6            Miami Heat* (7)  44  38  0.537  14.0  109.5  109.8 -0.13
 7         Atlanta Hawks* (8)  41  41  0.500  17.0  118.4  118.1  0.32
 8       Toronto Raptors* (9)  41  41  0.500  17.0  112.9  111.4  1.59
 9        Chicago Bulls* (10)  40  42  0.488  18.0  113.1  111.8  1.37
 10       Indiana Pacers (11)  35  47  0.427  23.0  116.3  119.5 -2.91
 11   Washington Wizards (12)  35  47  0.427  23.0  113.2  114.4 -1.06
 12        Orlando Magic (13)  34  48  0.415  24.0  111.4  114.0 -2.39
 13   

In [5]:
standings_east_conf = Standings[0] #First Table
standings_east_conf

Unnamed: 0,Eastern Conference,W,L,W/L%,GB,PS/G,PA/G,SRS
0,Milwaukee Bucks* (1),58,24,0.707,—,116.9,113.3,3.61
1,Boston Celtics* (2),57,25,0.695,1.0,117.9,111.4,6.38
2,Philadelphia 76ers* (3),54,28,0.659,4.0,115.2,110.9,4.37
3,Cleveland Cavaliers* (4),51,31,0.622,7.0,112.3,106.9,5.23
4,New York Knicks* (5),47,35,0.573,11.0,116.0,113.1,2.99
5,Brooklyn Nets* (6),45,37,0.549,13.0,113.4,112.5,1.03
6,Miami Heat* (7),44,38,0.537,14.0,109.5,109.8,-0.13
7,Atlanta Hawks* (8),41,41,0.5,17.0,118.4,118.1,0.32
8,Toronto Raptors* (9),41,41,0.5,17.0,112.9,111.4,1.59
9,Chicago Bulls* (10),40,42,0.488,18.0,113.1,111.8,1.37


In [6]:
standings_west_conf = Standings[1] #2nd Table
standings_west_conf


Unnamed: 0,Western Conference,W,L,W/L%,GB,PS/G,PA/G,SRS
0,Denver Nuggets* (1),53,29,0.646,—,115.8,112.5,3.04
1,Memphis Grizzlies* (2),51,31,0.622,2.0,116.9,113.0,3.6
2,Sacramento Kings* (3),48,34,0.585,5.0,120.7,118.1,2.3
3,Phoenix Suns* (4),45,37,0.549,8.0,113.6,111.6,2.08
4,Los Angeles Clippers* (5),44,38,0.537,9.0,113.6,113.1,0.31
5,Golden State Warriors* (6),44,38,0.537,9.0,118.9,117.1,1.66
6,Los Angeles Lakers* (7),43,39,0.524,10.0,117.2,116.6,0.43
7,Minnesota Timberwolves* (8),42,40,0.512,11.0,115.8,115.8,-0.22
8,New Orleans Pelicans* (9),42,40,0.512,11.0,114.4,112.5,1.63
9,Oklahoma City Thunder* (10),40,42,0.488,13.0,117.5,116.4,0.96


Lets Clean it Up a little before we continue Scraping

In [7]:
#Now lets rename the columns Eastern Conf, and Western Conf to 'Team Name' so we can concat both tables
standings_east_conf.rename(columns={'Eastern Conference':'Team Name'},inplace=True)

standings_west_conf.rename(columns={'Western Conference':'Team Name'},inplace=True)

#Join both tables as standings table
standings_table = pd.concat([standings_east_conf,standings_west_conf], ignore_index=True,sort=False)

In [8]:
#Clean standings table
standings_table=standings_table.drop(columns="GB") #Drop GB not needed
#Lets make the columns more understandable
standings_table = standings_table.rename(columns=
                    {"PS/G":"Points_Per_Game", "PA/G":"Opp PPG", "SRS":"Simple Rating System"})

In [93]:
#Now lets add a column to signify teams that made the Postseason
postseason_status = ["Yes" 
                  if "*" in p 
                  else "No" 
                  for p in standings_table["Team Name"]]
standings_table["Playoff Status"] = postseason_status


KeyError: 'Team Name'

In [10]:
standings_table["Team Name"] = standings_table["Team Name"].str.replace("*", "")#Remove astericks
standings_table[["Team_Name", "num"]] = standings_table["Team Name"].str.split(
    "(", expand=True)#split the Team name column to remove symbols.Name the split columns
standings_table = standings_table.drop(columns=["num","Team Name"])#Drop old Team name column and symbols column


In [11]:
standings_table = standings_table.sort_values(by="W/L%", ascending=False)#Arrange standings by W/%
#Rearrange Table columns
standings_table = standings_table[['Team_Name', 'W', 'L', 'W/L%',
                                  'Points_Per_Game', 'Opp PPG', 'Simple Rating System','Playoff Status']]
standings_table.reset_index(drop = True, inplace=True)#reset index
standings_table.index = standings_table.index + 1 #Let it start from 1
standings_table.index =standings_table.index.set_names(["Ranking"])#Name the index Rank
standings_table

Unnamed: 0_level_0,Team_Name,W,L,W/L%,Points_Per_Game,Opp PPG,Simple Rating System,Playoff Status
Ranking,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Milwaukee Bucks,58,24,0.707,116.9,113.3,3.61,Yes
2,Boston Celtics,57,25,0.695,117.9,111.4,6.38,Yes
3,Philadelphia 76ers,54,28,0.659,115.2,110.9,4.37,Yes
4,Denver Nuggets,53,29,0.646,115.8,112.5,3.04,Yes
5,Cleveland Cavaliers,51,31,0.622,112.3,106.9,5.23,Yes
6,Memphis Grizzlies,51,31,0.622,116.9,113.0,3.6,Yes
7,Sacramento Kings,48,34,0.585,120.7,118.1,2.3,Yes
8,New York Knicks,47,35,0.573,116.0,113.1,2.99,Yes
9,Phoenix Suns,45,37,0.549,113.6,111.6,2.08,Yes
10,Brooklyn Nets,45,37,0.549,113.4,112.5,1.03,Yes


In [None]:
standings_table.rename(columns={"Playoff Status":"Postseason_status"}, inplace=True)

Back To Scraping


Lets get all the Team URLs to access their rosters and Schedules


In [12]:

tableeast = soup.select('table.sortable')[0] #Lets select the Eastern Conference Table
tablewest = soup.select('table.sortable')[1] #Lets select the Western Conference Table
time.sleep(5)
tableeast

<table class="suppress_all sortable stats_table" data-cols-to-freeze=",1" id="confs_standings_E">
<caption>Conference Standings Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="Eastern Conference" class="poptip sort_default_asc left" data-stat="team_name" scope="col">Eastern Conference</th>
<th aria-label="Wins" class="poptip right" data-stat="wins" data-tip="Wins" scope="col">W</th>
<th aria-label="Losses" class="poptip right" data-stat="losses" data-tip="Losses" scope="col">L</th>
<th aria-label="Win-Loss Percentage" class="poptip right" data-stat="win_loss_pct" data-tip="Win-Loss Percentage" scope="col">W/L%</th>
<th aria-label="GB" class="poptip sort_default_asc right" data-stat="gb" data-tip="Games Behind" scope="col">GB</th>
<th aria-label="Points Per Game" class="poptip right" data-stat="pts_per_g" data-tip="Points Per Game" scope="col">PS/G</th>
<th aria-label="Opponent Points Per Game" class="poptip right" data-

In [13]:
 #Lets find all the <a element in the Tables as they contain all the Team links
link1 =tableeast.find_all('a')
link2 =tablewest.find_all('a')

time.sleep(5)

In [14]:
#Lets get all the links

#Eastern Conference first
href1 = [l.get("href") for l in link1]

easturl = [f"https://www.basketball-reference.com{h}" for h in href1]

#Western Conference next
href2 = [l.get("href") for l in link2]
westurl = [f"https://www.basketball-reference.com{h}" for h in href2]

time.sleep(10)

In [15]:
#Add both east and west urls and make it all teams url
teamurl = easturl + westurl
teamurl

['https://www.basketball-reference.com/teams/MIL/2023.html',
 'https://www.basketball-reference.com/teams/BOS/2023.html',
 'https://www.basketball-reference.com/teams/PHI/2023.html',
 'https://www.basketball-reference.com/teams/CLE/2023.html',
 'https://www.basketball-reference.com/teams/NYK/2023.html',
 'https://www.basketball-reference.com/teams/BRK/2023.html',
 'https://www.basketball-reference.com/teams/MIA/2023.html',
 'https://www.basketball-reference.com/teams/ATL/2023.html',
 'https://www.basketball-reference.com/teams/TOR/2023.html',
 'https://www.basketball-reference.com/teams/CHI/2023.html',
 'https://www.basketball-reference.com/teams/IND/2023.html',
 'https://www.basketball-reference.com/teams/WAS/2023.html',
 'https://www.basketball-reference.com/teams/ORL/2023.html',
 'https://www.basketball-reference.com/teams/CHO/2023.html',
 'https://www.basketball-reference.com/teams/DET/2023.html',
 'https://www.basketball-reference.com/teams/DEN/2023.html',
 'https://www.basketball

Test on 1 team first


In [16]:
#Lets use the Team links and get Roster Details. Lets get from the bucks
easturlbucks = easturl[0]
data = requests.get(easturlbucks, headers=headers)

In [17]:
soup = BeautifulSoup(data.content, "html.parser")

In [18]:
#Lets get their schedule
soup = BeautifulSoup(data.content, "html.parser")
links = soup.select('a')
links = [l.get("href") for l in links]
links = [l for l in links if l and "_games" in l]
links = links[0]

scheduledata = requests.get(f"https://www.basketball-reference.com{links}") #Format and make a full URL

#Lets import the Schedule table
scheduletable = pd.read_html(scheduledata.content, match="Regular Season")
scheduletable=scheduletable[0] #To make it a pandas series


time.sleep(4)

In [19]:
#Lets also get the Regular season pergame stats of each member of the team
Rosterinfo = pd.read_html(data.content, match="Per Game") 
roster = Rosterinfo[0]  #To make it a pandas series
roster



Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Jrue Holiday,32,67,65,32.6,7.3,15.3,0.479,2.4,...,0.859,1.2,3.9,5.1,7.4,1.2,0.4,2.9,1.7,19.3
1,2,Giannis Antetokounmpo,28,63,63,32.1,11.2,20.3,0.553,0.7,...,0.645,2.2,9.6,11.8,5.7,0.8,0.8,3.9,3.1,31.1
2,3,Brook Lopez,34,78,78,30.4,6.1,11.5,0.531,1.7,...,0.784,2.0,4.7,6.7,1.3,0.5,2.5,1.4,2.6,15.9
3,4,Grayson Allen,27,72,70,27.4,3.4,7.7,0.44,2.0,...,0.905,0.8,2.4,3.3,2.3,0.9,0.2,1.0,1.6,10.4
4,5,Bobby Portis,27,70,22,26.0,5.7,11.5,0.496,1.4,...,0.768,2.2,7.4,9.6,1.5,0.4,0.2,1.2,1.6,14.1
5,6,Khris Middleton,31,33,19,24.3,5.4,12.3,0.436,1.5,...,0.902,0.8,3.4,4.2,4.9,0.7,0.2,2.2,2.1,15.1
6,7,Pat Connaughton,30,61,33,23.7,2.7,6.9,0.392,1.8,...,0.659,0.8,3.8,4.6,1.3,0.6,0.2,0.5,1.1,7.6
7,8,Joe Ingles,35,46,0,22.7,2.3,5.4,0.435,1.8,...,0.857,0.3,2.5,2.8,3.3,0.7,0.1,1.2,1.6,6.9
8,9,Jevon Carter,27,81,39,22.3,3.0,7.0,0.423,1.8,...,0.816,0.4,2.1,2.5,2.4,0.8,0.4,1.0,2.0,8.0
9,10,George Hill,36,35,0,19.1,1.7,3.8,0.447,0.7,...,0.739,0.4,1.5,1.9,2.5,0.5,0.1,0.8,1.2,5.0


IT WORKED!! SO LETS DO THAT FOR ALL TEAMS


In [20]:
nba_roster = []
team_schedule = []

In [21]:
#All Rosters

for i, s in enumerate(teamurl):
    teamname = [item.split('/')[-2] for item in teamurl]
    data = requests.get(s, headers=headers)
    soup = BeautifulSoup(data.content, "html.parser")
    all_roster = pd.read_html(data.content, match="Per Game")[0]

    
    soup = BeautifulSoup(data.content, "html.parser")
    links = [l.get("href") for l in soup.find_all('a')]
    links = [l for l in links if l and "_games" in l]
    scheduledata = requests.get(f"https://www.basketball-reference.com{links[0]}") #Format and make a full URL
    allscheduletable = pd.read_html(scheduledata.content, match="Regular Season")[0]


    allscheduletable['Team'] = teamname[i] #add team name so we can know what team
    all_roster['Team'] = teamname[i]

    team_schedule.append(allscheduletable)
    nba_roster.append(all_roster)

    time.sleep(8)

In [22]:
type(team_schedule)

list

In [23]:
schedule_df = pd.concat(team_schedule)
roster_df = pd.concat(nba_roster)

In [24]:
roster_df.sort_values(by = ["PTS"], ascending=False)

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Team
1,2,Joel Embiid,28,66,66,34.6,11.0,20.1,0.548,1.0,...,1.7,8.4,10.2,4.2,1.0,1.7,3.4,3.1,33.1,PHI
1,2,Luka Dončić,23,66,66,36.2,10.9,22.0,0.496,2.8,...,0.8,7.8,8.6,8.0,1.4,0.5,3.6,2.5,32.4,DAL
0,1,Damian Lillard,32,58,58,36.3,9.6,20.7,0.463,4.2,...,0.8,4.0,4.8,7.3,0.9,0.3,3.3,1.9,32.2,POR
0,1,Shai Gilgeous-Alexander,24,68,68,35.5,10.4,20.3,0.510,0.9,...,0.9,4.0,4.8,5.5,1.6,1.0,2.8,2.8,31.4,OKC
1,2,Giannis Antetokounmpo,28,63,63,32.1,11.2,20.3,0.553,0.7,...,2.2,9.6,11.8,5.7,0.8,0.8,3.9,3.1,31.1,MIL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,21,Sterling Brown,27,4,0,6.0,0.0,1.0,0.000,0.0,...,0.8,1.3,2.0,0.5,0.8,0.0,0.0,1.0,0.0,LAL
23,24,Alondes Williams,23,1,0,5.0,0.0,0.0,,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,2.0,1.0,0.0,BRK
24,25,Moses Brown,23,2,0,3.0,0.0,0.0,,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.5,0.5,0.0,BRK
19,20,Julian Champagnie,21,2,0,3.5,0.0,1.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,PHI


In [26]:
schedule_df

Unnamed: 0,G,Date,Start (ET),Unnamed: 3,Unnamed: 4,Unnamed: 5,Opponent,Unnamed: 7,Unnamed: 8,Tm,Opp,W,L,Streak,Notes,Team
0,1,"Thu, Oct 20, 2022",7:30p,,Box Score,@,Philadelphia 76ers,W,,90,88,1,0,W 1,,MIL
1,2,"Sat, Oct 22, 2022",8:00p,,Box Score,,Houston Rockets,W,,125,105,2,0,W 2,,MIL
2,3,"Wed, Oct 26, 2022",7:30p,,Box Score,,Brooklyn Nets,W,,110,99,3,0,W 3,,MIL
3,4,"Fri, Oct 28, 2022",8:00p,,Box Score,,New York Knicks,W,,119,108,4,0,W 4,,MIL
4,5,"Sat, Oct 29, 2022",8:00p,,Box Score,,Atlanta Hawks,W,,123,115,5,0,W 5,,MIL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,79,"Tue, Apr 4, 2023",10:00p,,Box Score,@,Phoenix Suns,L,,94,115,20,59,L 1,,SAS
82,80,"Thu, Apr 6, 2023",8:00p,,Box Score,,Portland Trail Blazers,W,,129,127,21,59,W 1,,SAS
83,G,Date,Start (ET),,,,Opponent,,,Tm,Opp,W,L,Streak,Notes,SAS
84,81,"Sat, Apr 8, 2023",4:00p,,Box Score,,Minnesota Timberwolves,L,,131,151,21,60,L 1,,SAS


A LOT OF CLEANING NEEDED BEFORE VIZ



In [28]:
pd.set_option('display.max.columns', 29)
pd.set_option('display.max.rows', 2595)

In [36]:
#Index is all over the place. Fix
schedule_df.reset_index(drop=True)


Unnamed: 0,G,Date,Start (ET),Unnamed: 3,Unnamed: 4,Unnamed: 5,Opponent,Unnamed: 7,Unnamed: 8,Tm,Opp,W,L,Streak,Notes,Team
0,1,"Thu, Oct 20, 2022",7:30p,,Box Score,@,Philadelphia 76ers,W,,90,88,1,0,W 1,,MIL
1,2,"Sat, Oct 22, 2022",8:00p,,Box Score,,Houston Rockets,W,,125,105,2,0,W 2,,MIL
2,3,"Wed, Oct 26, 2022",7:30p,,Box Score,,Brooklyn Nets,W,,110,99,3,0,W 3,,MIL
3,4,"Fri, Oct 28, 2022",8:00p,,Box Score,,New York Knicks,W,,119,108,4,0,W 4,,MIL
4,5,"Sat, Oct 29, 2022",8:00p,,Box Score,,Atlanta Hawks,W,,123,115,5,0,W 5,,MIL
5,6,"Mon, Oct 31, 2022",8:00p,,Box Score,,Detroit Pistons,W,,110,108,6,0,W 6,,MIL
6,7,"Wed, Nov 2, 2022",8:00p,,Box Score,,Detroit Pistons,W,,116,91,7,0,W 7,,MIL
7,8,"Fri, Nov 4, 2022",10:00p,,Box Score,@,Minnesota Timberwolves,W,,115,102,8,0,W 8,,MIL
8,9,"Sat, Nov 5, 2022",8:00p,,Box Score,,Oklahoma City Thunder,W,,108,94,9,0,W 9,,MIL
9,10,"Mon, Nov 7, 2022",8:15p,,Box Score,@,Atlanta Hawks,L,,98,117,9,1,L 1,,MIL


In [47]:
schedule_df = schedule_df[schedule_df["Date"] != "Date"] #This drops the rows that are repeated headings

schedule_df

Unnamed: 0,G,Date,Start (ET),Unnamed: 3,Unnamed: 4,Unnamed: 5,Opponent,Unnamed: 7,Unnamed: 8,Tm,Opp,W,L,Streak,Notes,Team
0,1,"Thu, Oct 20, 2022",7:30p,,Box Score,@,Philadelphia 76ers,W,,90,88,1,0,W 1,,MIL
1,2,"Sat, Oct 22, 2022",8:00p,,Box Score,,Houston Rockets,W,,125,105,2,0,W 2,,MIL
2,3,"Wed, Oct 26, 2022",7:30p,,Box Score,,Brooklyn Nets,W,,110,99,3,0,W 3,,MIL
3,4,"Fri, Oct 28, 2022",8:00p,,Box Score,,New York Knicks,W,,119,108,4,0,W 4,,MIL
4,5,"Sat, Oct 29, 2022",8:00p,,Box Score,,Atlanta Hawks,W,,123,115,5,0,W 5,,MIL
5,6,"Mon, Oct 31, 2022",8:00p,,Box Score,,Detroit Pistons,W,,110,108,6,0,W 6,,MIL
6,7,"Wed, Nov 2, 2022",8:00p,,Box Score,,Detroit Pistons,W,,116,91,7,0,W 7,,MIL
7,8,"Fri, Nov 4, 2022",10:00p,,Box Score,@,Minnesota Timberwolves,W,,115,102,8,0,W 8,,MIL
8,9,"Sat, Nov 5, 2022",8:00p,,Box Score,,Oklahoma City Thunder,W,,108,94,9,0,W 9,,MIL
9,10,"Mon, Nov 7, 2022",8:15p,,Box Score,@,Atlanta Hawks,L,,98,117,9,1,L 1,,MIL


In [51]:
schedule_df.drop(columns=["Unnamed: 3"], inplace=True) #Drop column. Its filled with blanks

KeyError: "['Unnamed: 3'] not found in axis"

In [57]:
schedule_df['Date'] = pd.to_datetime(schedule_df['Date'], format="%a, %b %d, %Y") 
#make date column a date datatype

In [60]:
schedule_df["Day_of_the_week"]=schedule_df['Date'].dt.day_name()

In [61]:
schedule_df["Day_of_the_week"]

0      Thursday
1      Saturday
2     Wednesday
3        Friday
4      Saturday
5        Monday
6     Wednesday
7        Friday
8      Saturday
9        Monday
10    Wednesday
11       Friday
12       Monday
13    Wednesday
14       Friday
15       Monday
16    Wednesday
17       Friday
18       Sunday
19    Wednesday
21       Friday
22     Saturday
23       Monday
24    Wednesday
25       Friday
26       Sunday
27      Tuesday
28     Thursday
29     Saturday
30       Monday
31    Wednesday
32       Friday
33       Sunday
34    Wednesday
35       Friday
36       Sunday
37      Tuesday
38    Wednesday
39       Friday
40       Monday
42    Wednesday
43     Thursday
44     Saturday
45       Monday
46      Tuesday
47     Saturday
48       Monday
49    Wednesday
50       Friday
51       Sunday
52      Tuesday
53     Thursday
54     Saturday
55       Monday
56     Thursday
57       Friday
58      Tuesday
59     Thursday
60       Friday
61       Sunday
63      Tuesday
64    Wednesday
65     S

In [63]:
schedule_df.drop(columns=['Unnamed: 4', 'Notes'], inplace=True)

In [65]:
schedule_df.rename(columns={"Unnamed: 7":"Win or Loss",
                            "Unnamed: 8":"RegularTime/OT",
                            "Tm":"Team_points",
                            "Opp":"Opp_points"},inplace=True) #Adds more context to column names

In [71]:
#Unnamed 5 looks like a column that tells us if the team is Home or way. So lets make it clearer
schedule_df["Unnamed: 5"] = schedule_df["Unnamed: 5"].str.strip().fillna(
    'Home').replace("@", "Away") 

In [73]:
schedule_df.rename(columns={"Unnamed: 5":"Home/Away"},inplace=True)#Rename column

In [77]:
schedule_df.reset_index(drop=True)
schedule_df["RegularTime/OT"] = schedule_df["RegularTime/OT"].fillna("RT")

In [82]:
schedule_df["Start (ET)"] = pd.to_datetime(schedule_df["Start (ET)"],format="mixed").dt.strftime("%H:%M")

In [85]:
schedule_df.set_index(["G"],inplace=True)

In [90]:
roster_df

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Team
0,1,Jrue Holiday,32,67,65,32.6,7.3,15.3,0.479,2.4,6.1,0.384,5.0,9.1,0.542,0.556,2.3,2.6,0.859,1.2,3.9,5.1,7.4,1.2,0.4,2.9,1.7,19.3,MIL
1,2,Giannis Antetokounmpo,28,63,63,32.1,11.2,20.3,0.553,0.7,2.7,0.275,10.5,17.6,0.596,0.572,7.9,12.3,0.645,2.2,9.6,11.8,5.7,0.8,0.8,3.9,3.1,31.1,MIL
2,3,Brook Lopez,34,78,78,30.4,6.1,11.5,0.531,1.7,4.7,0.374,4.4,6.9,0.637,0.606,1.9,2.4,0.784,2.0,4.7,6.7,1.3,0.5,2.5,1.4,2.6,15.9,MIL
3,4,Grayson Allen,27,72,70,27.4,3.4,7.7,0.44,2.0,5.1,0.399,1.4,2.7,0.518,0.571,1.6,1.8,0.905,0.8,2.4,3.3,2.3,0.9,0.2,1.0,1.6,10.4,MIL
4,5,Bobby Portis,27,70,22,26.0,5.7,11.5,0.496,1.4,3.7,0.37,4.3,7.8,0.555,0.555,1.4,1.8,0.768,2.2,7.4,9.6,1.5,0.4,0.2,1.2,1.6,14.1,MIL
5,6,Khris Middleton,31,33,19,24.3,5.4,12.3,0.436,1.5,4.9,0.315,3.8,7.4,0.516,0.499,2.8,3.1,0.902,0.8,3.4,4.2,4.9,0.7,0.2,2.2,2.1,15.1,MIL
6,7,Pat Connaughton,30,61,33,23.7,2.7,6.9,0.392,1.8,5.3,0.339,0.9,1.6,0.566,0.521,0.4,0.7,0.659,0.8,3.8,4.6,1.3,0.6,0.2,0.5,1.1,7.6,MIL
7,8,Joe Ingles,35,46,0,22.7,2.3,5.4,0.435,1.8,4.4,0.409,0.5,1.0,0.556,0.603,0.4,0.5,0.857,0.3,2.5,2.8,3.3,0.7,0.1,1.2,1.6,6.9,MIL
8,9,Jevon Carter,27,81,39,22.3,3.0,7.0,0.423,1.8,4.2,0.421,1.2,2.8,0.425,0.549,0.4,0.5,0.816,0.4,2.1,2.5,2.4,0.8,0.4,1.0,2.0,8.0,MIL
9,10,George Hill,36,35,0,19.1,1.7,3.8,0.447,0.7,2.1,0.311,1.0,1.7,0.621,0.534,1.0,1.3,0.739,0.4,1.5,1.9,2.5,0.5,0.1,0.8,1.2,5.0,MIL


Get the tables in CSV format

In [105]:
import os
output_path = os.path.join(os.getcwd(), "output.csv")

In [106]:
standings_table.to_csv("22-23nbastandings.csv",index=False)
roster_df.to_csv("22-23nbaroster.csv",index=False)
schedule_df.to_csv("22-23nbaschedule.csv",index=False)