In [1]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

In [2]:
url = 'https://www.basketball-reference.com/teams/'

teams = pd.read_html(url, attrs = {'id': 'teams_active'})[0]
teams = teams[teams['To'].str[-2:] == '21']  
teams = teams.drop_duplicates(subset = ['Franchise']).reset_index(drop = True)

# Add team abbreviations
abbrev_url = 'https://en.wikipedia.org/wiki/Wikipedia:WikiProject_National_Basketball_Association/National_Basketball_Association_team_abbreviations'
abbrev = pd.read_html(abbrev_url, header = 0)[0]
teams = pd.merge(teams, abbrev)

teams.index.rename('team_id', inplace = True)
teams[['City', 'Name']] = teams['Franchise'].str.rsplit(' ', 1, expand = True)
teams = teams[['City', 'Name', 'Abbreviation/Acronym', 'From', 'To', 'Yrs', 'G', 'W', 'L', 'W/L%', 'Plyfs', 'Div', 'Conf', 'Champ']]

teams.head()

Unnamed: 0_level_0,City,Name,Abbreviation/Acronym,From,To,Yrs,G,W,L,W/L%,Plyfs,Div,Conf,Champ
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,Atlanta,Hawks,ATL,1949-50,2020-21,72,5691,2807,2884,0.493,47,11,0,1
1,Boston,Celtics,BOS,1946-47,2020-21,75,5868,3462,2406,0.59,57,31,9,17
2,Brooklyn,Nets,BKN,1967-68,2020-21,54,4366,1907,2459,0.437,29,5,2,2
3,Charlotte,Hornets,CHA,1988-89,2020-21,31,2467,1083,1384,0.439,10,0,0,0
4,Chicago,Bulls,CHI,1966-67,2020-21,55,4434,2258,2176,0.509,35,9,6,6


In [3]:
from collections import defaultdict

teams_dict = dict()

for team_name in teams['Name']:
    current_team = teams[teams['Name'] == team_name]
    
    teams_dict[team_name] = {}
    teams_dict[team_name]['id'] = int(current_team.index.values)
    teams_dict[team_name]['city'] = current_team.City.values[0]
    teams_dict[team_name]['initials'] = current_team['Abbreviation/Acronym'].values[0]
teams_dict

{'Hawks': {'id': 0, 'city': 'Atlanta', 'initials': 'ATL'},
 'Celtics': {'id': 1, 'city': 'Boston', 'initials': 'BOS'},
 'Nets': {'id': 2, 'city': 'Brooklyn', 'initials': 'BKN'},
 'Hornets': {'id': 3, 'city': 'Charlotte', 'initials': 'CHA'},
 'Bulls': {'id': 4, 'city': 'Chicago', 'initials': 'CHI'},
 'Cavaliers': {'id': 5, 'city': 'Cleveland', 'initials': 'CLE'},
 'Mavericks': {'id': 6, 'city': 'Dallas', 'initials': 'DAL'},
 'Nuggets': {'id': 7, 'city': 'Denver', 'initials': 'DEN'},
 'Pistons': {'id': 8, 'city': 'Detroit', 'initials': 'DET'},
 'Warriors': {'id': 9, 'city': 'Golden State', 'initials': 'GSW'},
 'Rockets': {'id': 10, 'city': 'Houston', 'initials': 'HOU'},
 'Pacers': {'id': 11, 'city': 'Indiana', 'initials': 'IND'},
 'Clippers': {'id': 12, 'city': 'Los Angeles', 'initials': 'LAC'},
 'Lakers': {'id': 13, 'city': 'Los Angeles', 'initials': 'LAL'},
 'Grizzlies': {'id': 14, 'city': 'Memphis', 'initials': 'MEM'},
 'Heat': {'id': 15, 'city': 'Miami', 'initials': 'MIA'},
 'Bucks':

In [4]:
def add_team_id(team_name):
    return teams_dict[team_name]['id']

def get_current_standings():
    url = 'https://www.basketball-reference.com/leagues/NBA_2021.html'
    east = pd.read_html(url, attrs= {'id': 'confs_standings_E'})[0]
    west = pd.read_html(url, attrs= {'id': 'confs_standings_W'})[0]
    
    east[['City', 'Name']] = east['Eastern Conference'].str.replace('[^a-zA-Z ]', '').str.rsplit(' ', 1, expand = True)
    west[['City', 'Name']] = west['Western Conference'].str.replace('[^a-zA-Z ]', '').str.rsplit(' ', 1, expand = True)
    east.loc[(east.City == 'Philadelphia'), 'Name'] = '76ers'
    
    west['team_id'] = west['Name'].apply(add_team_id)
    east['team_id'] = east['Name'].apply(add_team_id)

    east = east[['team_id', 'City', 'Name', 'W', 'L', 'W/L%', 'GB', 'PS/G', 'PA/G', 'SRS']]
    west = west[['team_id', 'City', 'Name', 'W', 'L', 'W/L%', 'GB', 'PS/G', 'PA/G', 'SRS']]    
    
    return east, west

In [5]:
east_standings, west_standings = get_current_standings()
east_standings.head()

Unnamed: 0,team_id,City,Name,W,L,W/L%,GB,PS/G,PA/G,SRS
0,22,Philadelphia,76ers,49,23,0.681,—,113.6,108.1,5.28
1,2,Brooklyn,Nets,48,24,0.667,1.0,118.6,114.1,4.24
2,16,Milwaukee,Bucks,46,26,0.639,3.0,120.1,114.2,5.57
3,19,New York,Knicks,41,31,0.569,8.0,107.0,104.7,2.13
4,0,Atlanta,Hawks,41,31,0.569,8.0,113.7,111.4,2.14


In [6]:
import time
import requests
from bs4 import BeautifulSoup
from bs4 import Comment

def get_current_players():
    current_players = pd.DataFrame()
    for team in teams_dict.keys():
        initial = teams_dict[team]['initials']
        url = f'https://www.basketball-reference.com/teams/{initial}/2021.html'

        if team == 'Nets':
            url = 'https://www.basketball-reference.com/teams/BRK/2021.html'
        if team == 'Hornets':
            url = 'https://www.basketball-reference.com/teams/CHO/2021.html'
        if team == 'Suns':
            url = 'https://www.basketball-reference.com/teams/PHO/2021.html'

        df = pd.read_html(url)[0]
        df['team_id'] = teams_dict[team]['id']
        current_players = current_players.append(df, ignore_index = True)
        time.sleep(1)
        
    current_players.index.rename('player_id', inplace = True)
    current_players['Player'].str.replace(' (TW)', '')
    
    current_players = current_players[['team_id','No.', 'Player', 'Pos', 'Ht', 'Wt', 'Birth Date', 'Exp', 'College']]
    
    return current_players

current_players = get_current_players()

In [7]:
current_players

Unnamed: 0_level_0,team_id,No.,Player,Pos,Ht,Wt,Birth Date,Exp,College
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,0,18,Solomon Hill,PF,6-6,226,"March 18, 1991",7,Arizona
1,0,3,Kevin Huerter,SG,6-7,190,"August 27, 1998",2,Maryland
2,0,11,Trae Young,PG,6-1,180,"September 19, 1998",2,Oklahoma
3,0,20,John Collins,PF,6-9,235,"September 23, 1997",3,Wake Forest
4,0,15,Clint Capela,C,6-10,240,"May 18, 1994",6,
...,...,...,...,...,...,...,...,...,...
501,29,16,Anthony Gill,PF,6-7,230,"October 17, 1992",R,"South Carolina, Virginia"
502,29,21,Daniel Gafford,C,6-10,234,"October 1, 1998",1,Arkansas
503,29,5,Cassius Winston (TW),PG,6-1,185,"February 28, 1998",R,Michigan State
504,29,1,Chandler Hutchison,SF,6-6,210,"April 26, 1996",2,Boise State


In [8]:
import requests
from bs4 import BeautifulSoup
from bs4 import Comment

page = requests.get('https://www.basketball-reference.com/teams/WAS/2021.html#totals::22').text
soup = BeautifulSoup(page, 'html.parser')

df = pd.read_html(page)[0]

comments = soup.find_all(string=lambda text: isinstance(text, Comment))

tables = []
for each in comments:
    if 'table' in each:
        try:
            tables.append(pd.read_html(each)[0])
        except:
            continue
player_totals = tables[4]

df = df.merge(player_totals, left_on = 'Player', right_on = 'Unnamed: 1', copy = False)