In [1]:
import pandas as pd
import numpy as np

import basketball_reference_web_scraper
from basketball_reference_web_scraper import client



In [2]:
TEAM_TO_TEAM_ABBR = {
        'ATLANTA HAWKS': 'ATL',
        'ST. LOUIS HAWKS': 'SLH',
        'MILWAUKEE HAWKS': 'MIL',
        'TRI-CITIES BLACKHAWKS': 'TCB',
        'BOSTON CELTICS': 'BOS',
        'BROOKLYN NETS': 'BRK',
        'NEW JERSEY NETS' : 'NJN',
        'NEW YORK NETS' : 'NYN',
        'CHICAGO BULLS': 'CHI',
        'CHARLOTTE HORNETS': 'CHO',
        'CHARLOTTE BOBCATS' : 'CHA',
        'CLEVELAND CAVALIERS': 'CLE',
        'DALLAS MAVERICKS': 'DAL',
        'DENVER NUGGETS': 'DEN',
        'DETROIT PISTONS': 'DET',
        'FORT WAYNE PISTONS': 'FWP',
        'GOLDEN STATE WARRIORS': 'GSW',
        'SAN FRANCISCO WARRIORS': 'SFW',
        'PHILADELPHIA WARRIORS': 'PHI',
        'HOUSTON ROCKETS': 'HOU',
        'SAN DIEGO ROCKETS': 'HOU',
        'INDIANA PACERS': 'IND',
        'LOS ANGELES CLIPPERS': 'LAC',
        'SAN DIEGO CLIPPERS': 'SDC',
        'BUFFALO BRAVES': 'BUF',
        'LOS ANGELES LAKERS': 'LAL',
        'MINNEAPOLIS LAKERS': 'MIN',
        'MEMPHIS GRIZZLIES': 'MEM',
        'VANCOUVER GRIZZLIES' : 'VAN',
        'MIAMI HEAT': 'MIA',
        'MILWAUKEE BUCKS': 'MIL',
        'MINNESOTA TIMBERWOLVES': 'MIN',
        'NEW ORLEANS PELICANS' : 'NOP',
        'NEW ORLEANS/OKLAHOMA CITY HORNETS' : 'NOK',
        'NEW ORLEANS HORNETS' : 'NOH',
        'NEW YORK KNICKS' : 'NYK',
        'OKLAHOMA CITY THUNDER' : 'OKC',
        'SEATTLE SUPERSONICS' : 'SEA',
        'ORLANDO MAGIC' : 'ORL',
        'PHILADELPHIA 76ERS' : 'PHI',
        'SYRACUSE NATIONALS' : 'SYR',
        'PHOENIX SUNS' : 'PHO',
        'PORTLAND TRAIL BLAZERS' : 'POR',
        'SACRAMENTO KINGS' : 'SAC',
        'KANSAS CITY KINGS' : 'KCK',
        'KANSAS CITY-OMAHA KINGS' : 'KCK',
        'CINCINNATI ROYALS' : 'CIN',
        'ROCHESTER ROYALS': 'ROR',
        'SAN ANTONIO SPURS' : 'SAS',
        'TORONTO RAPTORS' : 'TOR',
        'UTAH JAZZ' : 'UTA',
        'NEW ORLEANS JAZZ' : 'NOJ',
        'WASHINGTON WIZARDS' : 'WAS',
        'CAPITAL BULLETS' : 'CAP',
        'BALTIMORE BULLETS' : 'BAL',
        }

TEAM_SETS = [['STL', 'TRI', 'MLH', 'ATL'],
 ['BOS'],
 ['NJN', 'BRK', 'NYN', 'NJA', 'NYA'],
 ['CHO', 'CHA', 'CHH'],
 ['CHI'],
 ['CLE'],
 ['DAL'],
 ['DEN', 'DNR', 'DNA'],
 ['DET', 'FTW'],
 ['GSW', 'SFW', 'PHW'],
 ['SDR', 'HOU'],
 ['INA', 'IND'],
 ['SDC', 'LAC', 'BUF'],
 ['LAL', 'MNL'],
 ['MEM', 'VAN'],
 ['MIA'],
 ['MIL'],
 ['MIN'],
 ['NOP', 'NOH', 'NOK'],
 ['NYK'],
 ['SEA', 'OKC'],
 ['ORL'],
 ['PHI', 'SYR'],
 ['PHO'],
 ['POR'],
 ['CIN', 'SAC', 'KCO', 'KCK', 'ROC'],
 ['DLC', 'SAA', 'SAS', 'TEX'],
 ['TOR'],
 ['NOJ', 'UTA'],
 ['WSB', 'CHP', 'CAP', 'BAL', 'WAS', 'CHZ']]

# Teams that have appeared since the 1999/2000 season:::::

WEST_TEAMS = ['Phoenix Suns',
 'New Orleans Pelicans',
 'San Antonio Spurs',
 'New Orleans Hornets',
 'Seattle SuperSonics',
 'New Orleans/Oklahoma City Hornets',
 'Oklahoma City Thunder',
 'Memphis Grizzlies',
 'Utah Jazz',
 'Houston Rockets',
 'Los Angeles Clippers',
 'Los Angeles Lakers',
 'Portland Trail Blazers',
 'Denver Nuggets',
 'Golden State Warriors',
 'Dallas Mavericks',
 'Minnesota Timberwolves',
 'Sacramento Kings',
 'Vancouver Grizzlies']

EAST_TEAMS =['Atlanta Hawks',
 'Brooklyn Nets',
 'New York Knicks',
 'New Orleans Hornets',
 'Milwaukee Bucks',
 'Philadelphia 76ers',
 'Charlotte Hornets',
 'Orlando Magic',
 'New Jersey Nets',
 'Charlotte Bobcats',
 'Miami Heat',
 'Toronto Raptors',
 'Chicago Bulls',
 'Detroit Pistons',
 'Cleveland Cavaliers',
 'Boston Celtics',
 'Indiana Pacers',
 'Washington Wizards']

all_teams = [team for team in set(WEST_TEAMS + EAST_TEAMS) if'Division' not in team]

abb_team = [TEAM_TO_TEAM_ABBR[team.upper()] for team in all_teams]

In [3]:
def ht_converter(x):
    y = x.split('-')
    y_ft = float(y[0]) + float(y[1])/12
    y_mt = y_ft*0.3048
    return y_mt

def wt_converter(x):
    y_kg = x*0.453592
    return y_kg

In [5]:
def collect_attributes(year_list, set_abb_teams=list(set(abb_team))):
# Collecting rosters for each year and small amount of cleaning

    rosters_essentials = {}

    for year in year_list:
        rosters_essentials[year] = {}
        for team in set_abb_teams:
            try:
                df = get_roster(team, year)

                df.PLAYER = df.PLAYER.apply(lambda x: x.replace(' (TW)', ''))

                df.EXPERIENCE = df.EXPERIENCE.map(lambda x: 0 if x == 'R' else int(x))
                df.COLLEGE.fillna('International', inplace=True)

                df.WEIGHT = df.WEIGHT.apply(wt_converter)
                df.HEIGHT = df.HEIGHT.apply(ht_converter)

                df['TEAM'] = team
                df['YEAR'] = year

                rosters_essentials[year][team] = df
            except:
                continue
    
    return rosters_essentials

In [None]:
year_list = list(range(1999, 20201))

rosters = collect_attributes(year_list)

roster_list = []
for year in rosters:
    for team in rosters[year]:
        df_list.append(rosters[year][team])
        
df = pd.concat(roster_list)

df.to_csv('...')