In [1]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

In [2]:
SEASONS = ['2015','2016','2017','2018','2019','2020','2021','2022','2023']

In [3]:
coaches_clean = pd.DataFrame()
for season in SEASONS:
    coaches_df  = pd.read_html(f'https://www.basketball-reference.com/leagues/NBA_{season}_coaches.html')[0]
    coaches_clean_season = pd.concat([coaches_df[( 'Unnamed: 1_level_0',  'Unnamed: 1_level_1', 'Tm')],coaches_df[( 'Unnamed: 0_level_0',  'Unnamed: 0_level_1', 'Coach')],
                           coaches_df[('Regular Season','Current Season','G')]],axis=1)
    coaches_clean_season.columns = ['Tm','Coach','G']
    coaches_clean_season['Season'] = season
    coaches_clean = pd.concat([coaches_clean,coaches_clean_season],ignore_index=True)

In [4]:
teams_ids = [x['id'] for x in teams.get_teams()]
all_games_df = pd.DataFrame()
for team in teams_ids:
    games = leaguegamefinder.LeagueGameFinder(team_id_nullable=team).get_data_frames()[0]
    games['SEASON_YEAR'] = games['SEASON_ID'].str[-4:]
    games = games[games['SEASON_YEAR'].isin(SEASONS)]
    games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
    all_games_df = pd.concat([all_games_df,games],ignore_index=True)


In [5]:
all_games_df = all_games_df[all_games_df['SEASON_ID'].astype(str).str.startswith('2')]

In [6]:
season_starts = ['2015-10-27','2016-10-25','2017-10-17','2018-10-16','2019-10-22','2020-12-22','2021-10-19','2022-10-18','2023-10-24']

In [7]:
season_starts_dict = dict(zip(SEASONS,season_starts))

In [8]:
season_starts_dict

{'2015': '2015-10-27',
 '2016': '2016-10-25',
 '2017': '2017-10-17',
 '2018': '2018-10-16',
 '2019': '2019-10-22',
 '2020': '2020-12-22',
 '2021': '2021-10-19',
 '2022': '2022-10-18',
 '2023': '2023-10-24'}

In [9]:
regular_seasons = []

for season in SEASONS:
    regular_season_year = all_games_df[(all_games_df['SEASON_ID']==f'2{season}')&(all_games_df['GAME_DATE']>=season_starts_dict[season])]
    if season not in ['2015','2019','2020']: 
        assert regular_season_year.groupby('TEAM_ID').size().iloc[0] == 82
    regular_seasons.append(regular_season_year)

In [10]:
regular_season_df = pd.concat(regular_seasons,ignore_index=True)

In [11]:
regular_season_df['GAME_DATE'] = pd.to_datetime(regular_season_df['GAME_DATE'])

In [12]:
regular_season_coaches = regular_season_df.copy()

In [13]:
regular_season_coaches = regular_season_coaches.sort_values(['SEASON_YEAR','TEAM_ABBREVIATION','GAME_DATE']).reset_index(drop=True)

In [14]:
coaches_clean['Tm'] = coaches_clean['Tm'].replace('CHO','CHA').replace('PHO','PHX').replace('BRK','BKN')


In [16]:
for ix,group in regular_season_coaches.groupby(['SEASON_YEAR','TEAM_ABBREVIATION']):
    df_team_season = group.sort_values('GAME_DATE')
    season_yr = df_team_season['SEASON_YEAR'].iloc[0]
    team_abr = df_team_season['TEAM_ABBREVIATION'].iloc[0]
    coaches_year = coaches_clean[(coaches_clean['Season'] == season_yr) &
                                 (coaches_clean['Tm']==team_abr)]
    if coaches_year.shape[0] == 1:
        games = None
        regular_season_coaches.loc[(regular_season_coaches['SEASON_YEAR']==season_yr)&
                              (regular_season_coaches['TEAM_ABBREVIATION']==team_abr),'Coach'] = coaches_year['Coach'].iloc[0]
    else:
        games = coaches_year['G'].unique()
        if games.shape[0] == 2:
            change_coaches_df = regular_season_coaches.loc[(regular_season_coaches['SEASON_YEAR']==season_yr)&
                              (regular_season_coaches['TEAM_ABBREVIATION']==team_abr)]
            regular_season_coaches.loc[(regular_season_coaches.index >= change_coaches_df.index.min())&
                                       (regular_season_coaches.index < change_coaches_df.index.min()+games[0]),'Coach'] = coaches_year['Coach'].iloc[0]
            regular_season_coaches.loc[(regular_season_coaches.index >= change_coaches_df.index.min()+games[0]) &
                                        (regular_season_coaches.index < change_coaches_df.index.min()+games[0]+games[1]),'Coach'] = coaches_year['Coach'].iloc[1]
        if games.shape[0] == 3:
            change_coaches_df = regular_season_coaches.loc[(regular_season_coaches['SEASON_YEAR']==season_yr)&
                              (regular_season_coaches['TEAM_ABBREVIATION']==team_abr)]
            regular_season_coaches.loc[(regular_season_coaches.index >= change_coaches_df.index.min())&
                                       (regular_season_coaches.index < change_coaches_df.index.min()+games[0]),'Coach'] = coaches_year['Coach'].iloc[0]
            regular_season_coaches.loc[(regular_season_coaches.index >= change_coaches_df.index.min()+games[0]) &
                                        (regular_season_coaches.index < change_coaches_df.index.min()+games[0]+games[1]),'Coach'] = coaches_year['Coach'].iloc[1]
            regular_season_coaches.loc[(regular_season_coaches.index >= change_coaches_df.index.min()+games[0]+games[1]) &
                                        (regular_season_coaches.index < change_coaches_df.index.min()+games[0]+games[1]+games[2]),'Coach'] = coaches_year['Coach'].iloc[2]
            


In [20]:
regular_season_coaches.loc[(regular_season_coaches.index >= 2870)&(regular_season_coaches.index <= 2870+41),'Coach'] = 'David Blatt'
regular_season_coaches.loc[(regular_season_coaches.index > 2870+41)&(regular_season_coaches.index <= 2870+41+41),'Coach'] = 'Tyronn Lue'
regular_season_coaches.loc[(regular_season_coaches['TEAM_ABBREVIATION']=='CLE')&
                       (regular_season_coaches['SEASON_YEAR']=='2020')&
                       (regular_season_coaches.index >= 12383),'Coach'] = 'J.B. Bickerstaff'
regular_season_coaches.loc[(regular_season_coaches['TEAM_ABBREVIATION']=='NYK')&
                       (regular_season_coaches['SEASON_YEAR']=='2020')&
                       (regular_season_coaches.index >= 13392),'Coach'] = 'Mike Miller'
regular_season_coaches.loc[(regular_season_coaches['TEAM_ABBREVIATION']=='ATL')&
                       (regular_season_coaches['SEASON_YEAR']=='2021')&
                       (regular_season_coaches.index>=14118),'Coach'] = 'Nate McMillan'
regular_season_coaches.loc[(regular_season_coaches['TEAM_ABBREVIATION']=='MIN')&
                       (regular_season_coaches['SEASON_YEAR']=='2021')&
                       (regular_season_coaches.index >= 15584),'Coach'] = 'Chris Finch'

In [22]:
regular_season_coaches.to_csv('data/regular_season_coaches.csv',index=False)