In [1]:
import time
import requests
from bs4 import BeautifulSoup
from pyNBA.Data.constants import LINEUP_TEAM_TO_NBA_TEAM, LINEUP_NAME_TO_NBA_NAME

import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
from nba_api.stats.static.teams import find_team_by_abbreviation
from nba_api.stats.endpoints import CommonTeamRoster

In [3]:
def prepare_team(team):
    if team in LINEUP_TEAM_TO_NBA_TEAM:
        return LINEUP_TEAM_TO_NBA_TEAM[team]
    return team

def prepare_name(name, team):
    if name in LINEUP_NAME_TO_NBA_NAME:
        if isinstance(LINEUP_NAME_TO_NBA_NAME[name], dict):
            return LINEUP_NAME_TO_NBA_NAME[name][team]
        return LINEUP_NAME_TO_NBA_NAME[name]
    return name

In [4]:
import pandas as pd
current_player_data = pd.DataFrame(columns=[
    'TEAM', 'NAME', 'START', 'PLAYERSTATUS'
])

In [5]:
def get_player_data(lineup):
    player_data = pd.DataFrame(columns=[
        'NAME', 'START', 'PLAYERSTATUS'
    ])

    players_added = {}
    lineup_status = ''
    start = 1

    rows = lineup.find_all('li')
    for row in rows:
        row_class = row['class']
        if row_class[0] == 'lineup__status':
            lineup_status_data = row_class[1]
            if lineup_status_data == 'is-expected':
                lineup_status = 'Expected'
            elif lineup_status_data == 'is-confirmed':
                lineup_status = 'Confirmed'
        elif row_class[0] == 'lineup__title':
            start = 0
        elif row_class[0] == 'lineup__player':
            player_start = start
            player_position = row.find('div', class_='lineup__pos').text
            player_name = row.find('a').text
            status_data = row.find('span', class_='lineup__inj')
            player_status = 'Healthy' if status_data is None else status_data.text
            if player_position != 'BE' and player_name not in players_added:
                temp = pd.Series(
                    [player_name, start, player_status],
                    index=['NAME', 'START', 'PLAYERSTATUS']
                    )
                player_data = player_data.append(temp, ignore_index=True)
                players_added[player_name] = 1
                
    return player_data, lineup_status
    

In [6]:
team_to_opp_team = {}
team_to_status = {}

In [7]:
                                  
URL = 'https://www.rotowire.com/basketball/nba-lineups.php'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

games = soup.find_all('div', class_='lineup is-nba')
for game in games:
    away_team = game.find('a', class_='lineup__team is-visit').find('div', class_='lineup__abbr').text
    away_team = prepare_team(away_team)
    away_lineup = game.find('ul', class_='lineup__list is-visit')
    away_player_data, away_lineup_status = get_player_data(away_lineup)
    away_player_data['TEAM'] = away_team
    away_player_data['NAME'] = away_player_data['NAME'].apply(lambda x: prepare_name(x, away_team))

    home_team = game.find('a', class_='lineup__team is-home').find('div', class_='lineup__abbr').text
    home_team = prepare_team(home_team)
    home_lineup = game.find('ul', class_='lineup__list is-home')
    home_player_data, home_lineup_status = get_player_data(home_lineup)
    home_player_data['TEAM'] = home_team
    home_player_data['NAME'] = home_player_data['NAME'].apply(lambda x: prepare_name(x, home_team))
    
    team_to_opp_team[away_team] = home_team
    team_to_opp_team[home_team] = away_team
    team_to_status[away_team] = away_lineup_status
    team_to_status[home_team] = home_lineup_status
    
    current_player_data = current_player_data.append(away_player_data)
    current_player_data = current_player_data.append(home_player_data)

In [8]:
roster_data = pd.DataFrame()
for team_abbreviation in current_player_data['TEAM'].unique():
    print(team_abbreviation)
    team = find_team_by_abbreviation(team_abbreviation)
    team_id = team['id']

    roster = CommonTeamRoster(season='2020-21', team_id=team_id).get_data_frames()[0]
    time.sleep(0.500)
    
    roster['TEAM'] = team['abbreviation']

    roster_data = roster_data.append(roster)

roster_data = roster_data.rename(columns={'TeamID': 'TEAMID', 'PLAYER_ID': 'PLAYERID', 'PLAYER': 'NAME'})

roster_data['POSITION'] = roster_data['POSITION'].str.replace('G', 'Guard')
roster_data['POSITION'] = roster_data['POSITION'].str.replace('F', 'Forward')
roster_data['POSITION'] = roster_data['POSITION'].str.replace('C', 'Center')

IND
CLE
OKC
SAS
TOR
CHA
DAL
MIL
MEM
MIN
DEN
GSW
PHX
UTA


In [9]:
current_data = roster_data.merge(current_player_data, on=['NAME', 'TEAM'], how='left')

current_data['LINEUPSTATUS'] = current_data['TEAM'].apply(lambda x: team_to_status[x])
current_data['OPP_TEAM'] = current_data['TEAM'].apply(lambda x: team_to_opp_team[x])

from datetime import datetime
current_date = datetime.now().strftime("%Y-%m-%d")
current_data['DATE'] = current_date

current_data['START'] = current_data['START'].fillna(0)
current_data['PLAYERSTATUS'] = current_data['PLAYERSTATUS'].fillna('Healthy')

In [10]:
current_data = current_data[['LINEUPSTATUS', 'PLAYERID', 'SEASON', 'DATE', 'TEAM', 'OPP_TEAM', 'NAME', 'POSITION', 'START', 'PLAYERSTATUS']]
display(current_data)

Unnamed: 0,LINEUPSTATUS,PLAYERID,SEASON,DATE,TEAM,OPP_TEAM,NAME,POSITION,START,PLAYERSTATUS
0,Expected,1629665,2020,2020-12-12,IND,CLE,Jalen Lecque,Guard,0,Healthy
1,Expected,1630262,2020,2020-12-12,IND,CLE,Rayshaun Hammonds,Forward,0,Healthy
2,Expected,1629103,2020,2020-12-12,IND,CLE,Kelan Martin,Forward,0,Healthy
3,Expected,203933,2020,2020-12-12,IND,CLE,T.J. Warren,Forward,0,OUT
4,Expected,1630199,2020,2020-12-12,IND,CLE,Cassius Stanley,Guard,0,Healthy
...,...,...,...,...,...,...,...,...,...,...
265,Expected,1630190,2020,2020-12-12,UTA,PHX,Elijah Hughes,Guard,0,Healthy
266,Expected,1629129,2020,2020-12-12,UTA,PHX,Trevon Bluiett,Guard,0,Healthy
267,Expected,202711,2020,2020-12-12,UTA,PHX,Bojan Bogdanovic,Forward,0,Healthy
268,Expected,1628378,2020,2020-12-12,UTA,PHX,Donovan Mitchell,Guard,0,Healthy


In [11]:
print(len(current_data.loc[current_data['START'] == 1]))
print(len(current_data['TEAM'].unique()))

40
14


In [12]:
print(current_player_data.loc[~current_player_data['NAME'].isin(roster_data['NAME'].unique())]['NAME'].unique())

['M. Brogdon' 'V. Oladipo' 'J. Holiday' 'D. Sabonis' 'Brian Bowen'
 'D. Garland' 'A. Drummond' 'Kevin Porter' 'S. Gilgeous-Alexander'
 'H. Diallo' 'A. Schofield' 'A. Pokusevski' 'J. Jackson' 'D. Murray'
 'L. Aldridge' 'K. Johnson' 'Q. Weatherspoon' 'Lonnie Walker' 'D. Graham'
 'G. Hayward' 'PJ Washington' 'Tim Hardaway' 'J. Richardson'
 'K. Porzingis' 'D. DiVincenzo' 'K. Middleton' 'G. Antetokounmpo'
 'B. Clarke' 'J. Valanciunas' 'Jaren Jackson' 'K. Tillie' 'J. Winslow'
 'D. Melton' 'D. Russell' 'A. Edwards' 'J. Hernangomez' 'K. Towns'
 'M. Porter' 'A. Wiggins' 'Kelly Oubre' 'D. Green' 'D. Mitchell'
 'S. Harrison' 'Bogdan Bogdanovic']


In [57]:
print(roster_data.loc[roster_data['NAME'].str.contains('Bogdanovic')])

        TEAMID SEASON LeagueID              NAME       PLAYER_SLUG NUM  \
17  1610612762   2020       00  Bojan Bogdanovic  bojan-bogdanovic  44   

   POSITION HEIGHT WEIGHT    BIRTH_DATE   AGE EXP      SCHOOL  PLAYERID TEAM  
17  Forward    6-8    226  APR 18, 1989  31.0   6  Fenerbahce    202711  UTA  
