In [9]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
from enum import Enum
from datetime import datetime
from tqdm import tqdm
import time
from tabulate import tabulate

from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION
from basketball_reference_web_scraper.errors import InvalidDate
from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths
from basketball_reference_web_scraper.parsers.play_by_play import parse_play_by_plays
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
from basketball_reference_web_scraper.parsers.box_scores.teams import parse_team_totals
from basketball_reference_web_scraper.parsers.players_advanced_season_totals import parse_players_advanced_season_totals
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths
from basketball_reference_web_scraper import client

# can get the page html content, which returns a response object
# stats_page = requests.get('https://www.basketball-reference.com/leagues/NBA_2018_per_game.html')

# access the web page HTML with the content attribute
# content = stats_page.content
# soup = BeautifulSoup(content, 'html.parser')
# table = soup.find(name='table', attrs={'id':'per_game_stats'})

In [10]:
class Team(Enum):
    ATLANTA_HAWKS = "ATLANTA HAWKS"
    BOSTON_CELTICS = "BOSTON CELTICS"
    BROOKLYN_NETS = "BROOKLYN NETS"
    CHARLOTTE_HORNETS = "CHARLOTTE HORNETS"
    CHICAGO_BULLS = "CHICAGO BULLS"
    CLEVELAND_CAVALIERS = "CLEVELAND CAVALIERS"
    DALLAS_MAVERICKS = "DALLAS MAVERICKS"
    DENVER_NUGGETS = "DENVER NUGGETS"
    DETROIT_PISTONS = "DETROIT PISTONS"
    GOLDEN_STATE_WARRIORS = "GOLDEN STATE WARRIORS"
    HOUSTON_ROCKETS = "HOUSTON ROCKETS"
    INDIANA_PACERS = "INDIANA PACERS"
    LOS_ANGELES_CLIPPERS = "LOS ANGELES CLIPPERS"
    LOS_ANGELES_LAKERS = "LOS ANGELES LAKERS"
    MEMPHIS_GRIZZLIES = "MEMPHIS GRIZZLIES"
    MIAMI_HEAT = "MIAMI HEAT"
    MILWAUKEE_BUCKS = "MILWAUKEE BUCKS"
    MINNESOTA_TIMBERWOLVES = "MINNESOTA TIMBERWOLVES"
    NEW_ORLEANS_PELICANS = "NEW ORLEANS PELICANS"
    NEW_YORK_KNICKS = "NEW YORK KNICKS"
    OKLAHOMA_CITY_THUNDER = "OKLAHOMA CITY THUNDER"
    ORLANDO_MAGIC = "ORLANDO MAGIC"
    PHILADELPHIA_76ERS = "PHILADELPHIA 76ERS"
    PHOENIX_SUNS = "PHOENIX SUNS"
    PORTLAND_TRAIL_BLAZERS = "PORTLAND TRAIL BLAZERS"
    SACRAMENTO_KINGS = "SACRAMENTO KINGS"
    SAN_ANTONIO_SPURS = "SAN ANTONIO SPURS"
    TORONTO_RAPTORS = "TORONTO RAPTORS"
    UTAH_JAZZ = "UTAH JAZZ"
    WASHINGTON_WIZARDS = "WASHINGTON WIZARDS"

    # DEPRECATED TEAMS
    CHARLOTTE_BOBCATS = "CHARLOTTE BOBCATS"
    NEW_JERSEY_NETS = "NEW JERSEY NETS"
    NEW_ORLEANS_HORNETS = "NEW ORLEANS HORNETS"
    NEW_ORLEANS_OKLAHOMA_CITY_HORNETS = "NEW ORLEANS/OKLAHOMA CITY HORNETS"
    SEATTLE_SUPERSONICS = "SEATTLE SUPERSONICS"
    VANCOUVER_GRIZZLIES = "VANCOUVER GRIZZLIES"

    


In [11]:
class Club:
    
    instances = []
    
    def __init__(self, name):
        self.name = name
        Club.instances.append(self)
        
    def __repr__(self):
        return(self.name)
        
ATLANTA_HAWKS = Club("ATLANTA_HAWKS")
BOSTON_CELTICS = Club("BOSTON_CELTICS")
BROOKLYN_NETS = Club("BROOKLYN_NETS")
CHARLOTTE_HORNETS = Club("CHARLOTTE_HORNETS")
CHICAGO_BULLS = Club("CHICAGO_BULLS")
CLEVELAND_CAVALIERS = Club("CLEVELAND_CAVALIERS")
DALLAS_MAVERICKS = Club("DALLAS_MAVERICKS")
DENVER_NUGGETS = Club("DENVER_NUGGETS")
DETROIT_PISTONS = Club("DETROIT_PISTONS")
GOLDEN_STATE_WARRIORS = Club("GOLDEN_STATE_WARRIORS")
HOUSTON_ROCKETS = Club("HOUSTON_ROCKETS")
INDIANA_PACERS = Club("INDIANA_PACERS")
LOS_ANGELES_CLIPPERS = Club("LOS_ANGELES_CLIPPERS")
LOS_ANGELES_LAKERS = Club("LOS_ANGELES_LAKERS")
MEMPHIS_GRIZZLIES = Club("MEMPHIS_GRIZZLIES")
MIAMI_HEAT = Club("MIAMI_HEAT")
MILWAUKEE_BUCKS = Club("MILWAUKEE_BUCKS")
MINNESOTA_TIMBERWOLVES = Club("MINNESOTA_TIMBERWOLVES")
NEW_ORLEANS_PELICANS = Club("NEW_ORLEANS_PELICANS")
NEW_YORK_KNICKS = Club("NEW_YORK_KNICKS")
OKLAHOMA_CITY_THUNDER = Club("OKLAHOMA_CITY_THUNDER")
ORLANDO_MAGIC = Club("ORLANDO_MAGIC")
PHILADELPHIA_76ERS = Club("PHILADELPHIA_76ERS")
PHOENIX_SUNS = Club("PHOENIX_SUNS")
PORTLAND_TRAIL_BLAZERS = Club("PORTLAND_TRAIL_BLAZERS")
SACRAMENTO_KINGS = Club("SACRAMENTO_KINGS")
SAN_ANTONIO_SPURS = Club("SAN_ANTONIO_SPURS")
TORONTO_RAPTORS = Club("TORONTO_RAPTORS")
UTAH_JAZZ = Club("UTAH_JAZZ")
WASHINGTON_WIZARDS = Club("WASHINGTON_WIZARDS")

# DEPRECATED TEAMS
CHARLOTTE_BOBCATS = Club("CHARLOTTE_BOBCATS")
NEW_JERSEY_NETS = Club("NEW_JERSEY_NETS")
NEW_ORLEANS_HORNETS = Club("NEW_ORLEANS_HORNETS")
NEW_ORLEANS_OKLAHOMA_CITY_HORNETS = Club("NEW_ORLEANS/OKLAHOMA_CITY_HORNETS")
SEATTLE_SUPERSONICS = Club("SEATTLE_SUPERSONICS")
VANCOUVER_GRIZZLIES = Club("VANCOUVER_GRIZZLIES")

team_list = list({id(instance): instance.name for instance in Club.instances}.values())

In [3]:
team_list

['ATLANTA_HAWKS',
 'BOSTON_CELTICS',
 'BROOKLYN_NETS',
 'CHARLOTTE_HORNETS',
 'CHICAGO_BULLS',
 'CLEVELAND_CAVALIERS',
 'DALLAS_MAVERICKS',
 'DENVER_NUGGETS',
 'DETROIT_PISTONS',
 'GOLDEN_STATE_WARRIORS',
 'HOUSTON_ROCKETS',
 'INDIANA_PACERS',
 'LOS_ANGELES_CLIPPERS',
 'LOS_ANGELES_LAKERS',
 'MEMPHIS_GRIZZLIES',
 'MIAMI_HEAT',
 'MILWAUKEE_BUCKS',
 'MINNESOTA_TIMBERWOLVES',
 'NEW_ORLEANS_PELICANS',
 'NEW_YORK_KNICKS',
 'OKLAHOMA_CITY_THUNDER',
 'ORLANDO_MAGIC',
 'PHILADELPHIA_76ERS',
 'PHOENIX_SUNS',
 'PORTLAND_TRAIL_BLAZERS',
 'SACRAMENTO_KINGS',
 'SAN_ANTONIO_SPURS',
 'TORONTO_RAPTORS',
 'UTAH_JAZZ',
 'WASHINGTON_WIZARDS',
 'CHARLOTTE_BOBCATS',
 'NEW_JERSEY_NETS',
 'NEW_ORLEANS_HORNETS',
 'NEW_ORLEANS/OKLAHOMA_CITY_HORNETS',
 'SEATTLE_SUPERSONICS',
 'VANCOUVER_GRIZZLIES']

In [7]:
[(team.replace('_',' '),team) for team in team_list]

[('ATLANTA HAWKS', 'ATLANTA_HAWKS'),
 ('BOSTON CELTICS', 'BOSTON_CELTICS'),
 ('BROOKLYN NETS', 'BROOKLYN_NETS'),
 ('CHARLOTTE HORNETS', 'CHARLOTTE_HORNETS'),
 ('CHICAGO BULLS', 'CHICAGO_BULLS'),
 ('CLEVELAND CAVALIERS', 'CLEVELAND_CAVALIERS'),
 ('DALLAS MAVERICKS', 'DALLAS_MAVERICKS'),
 ('DENVER NUGGETS', 'DENVER_NUGGETS'),
 ('DETROIT PISTONS', 'DETROIT_PISTONS'),
 ('GOLDEN STATE WARRIORS', 'GOLDEN_STATE_WARRIORS'),
 ('HOUSTON ROCKETS', 'HOUSTON_ROCKETS'),
 ('INDIANA PACERS', 'INDIANA_PACERS'),
 ('LOS ANGELES CLIPPERS', 'LOS_ANGELES_CLIPPERS'),
 ('LOS ANGELES LAKERS', 'LOS_ANGELES_LAKERS'),
 ('MEMPHIS GRIZZLIES', 'MEMPHIS_GRIZZLIES'),
 ('MIAMI HEAT', 'MIAMI_HEAT'),
 ('MILWAUKEE BUCKS', 'MILWAUKEE_BUCKS'),
 ('MINNESOTA TIMBERWOLVES', 'MINNESOTA_TIMBERWOLVES'),
 ('NEW ORLEANS PELICANS', 'NEW_ORLEANS_PELICANS'),
 ('NEW YORK KNICKS', 'NEW_YORK_KNICKS'),
 ('OKLAHOMA CITY THUNDER', 'OKLAHOMA_CITY_THUNDER'),
 ('ORLANDO MAGIC', 'ORLANDO_MAGIC'),
 ('PHILADELPHIA 76ERS', 'PHILADELPHIA_76ERS'),
 

In [14]:
def get_team_name(series):
    """Take a series returned by the scraper and extract the team name"""
    return series.value


def get_date(series):
    return series.date()

def enum_to_class(series):
    return Club(series.name) 

def get_season_number(series):
    year = series.year
    month = series.month

    if 1 <= month <= 7:
        upper = str(year)[2:]
        lower = str(year - 1)[2:]
        season = lower + '_' + upper
        return (season)
    elif 7 < month <= 12:
        upper = str(year + 1)[2:]
        lower = str(year)[2:]
        season = lower + '-' + upper
        return (season)
    else:
        return ('there was an error')
    

def get_nba_schedules(num_seasons):
    """get all the team schedules"""
    df = pd.DataFrame()
    for i in tqdm(range(num_seasons)):
        print(f'getting schedule {i}')
        # season = (datetime.now().year) - i
        # season = (datetime.now().year)
        df = df.append(pd.DataFrame.from_dict(client.season_schedule(season_end_year=season)), sort=False)
        time.sleep(.5)
        
    df['away_team'] = df['away_team'].apply(enum_to_class)
    df['home_team'] = df['home_team'].apply(enum_to_class)
    df['season'] = df['start_time'].apply(get_season_number)
    df['start_time'] = df['start_time'].apply(lambda x: x.date())
    df['home_win'] = df.apply(lambda row: 1 if row.home_team_score > row.away_team_score else 0, axis = 1)
    
    
    
    #return(df[['start_time','season','away_team','home_team']])
    return(df)

def get_team_box_scores(df):
    """return a dictionary of home game box scores for each team"""
    box_scores = dict()
    for team in team_list:
        home_games = df[df['home_team'].apply(lambda x: x.name) == team]
        schedule = home_games['start_time']
        box_scores[team] = home_games
    return box_scores

def get_full_box_scores(box_scores):
    """return dictionary with the full box scores appended to the dataframe vals"""
    full_box_scores = dict()
    for team in tqdm(team_list):
        print(f'Working on team {team}')
        df = box_scores[team]
        merged_df = pd.DataFrame()
        
        for index, row in tqdm(df.iterrows()):
            print(f'Working on row {row}')
            time.sleep(.5)
            month = row['start_time'].month
            day = row['start_time'].day
            year = row['start_time'].year
            team_box_scores = pd.DataFrame.from_dict(client.team_box_scores(day=day, month=month, year=year))
            team_box_scores['team'] = team_box_scores['team'].apply(enum_to_class)
            team_box_scores = team_box_scores[team_box_scores['team'].apply(lambda x: x.name) == team]
            merged_df = merged_df.append(pd.concat([row.to_frame().T.reset_index(),team_box_scores.reset_index()],axis=1))
            
            full_box_scores[team] = merged_df
            
    return full_box_scores
        

In [23]:
new_schedule = pd.DataFrame.from_dict(client.season_schedule(season_end_year=2020))
new_schedule.to_pickle('~/Insight/NBA_GameFinder/data/upcoming_season.pkl')

In [24]:
new_schedule

Unnamed: 0,start_time,away_team,home_team,away_team_score,home_team_score
0,2019-10-23 00:00:00+00:00,Team.NEW_ORLEANS_PELICANS,Team.TORONTO_RAPTORS,,
1,2019-10-23 02:30:00+00:00,Team.LOS_ANGELES_LAKERS,Team.LOS_ANGELES_CLIPPERS,,
2,2019-10-23 23:00:00+00:00,Team.CLEVELAND_CAVALIERS,Team.ORLANDO_MAGIC,,
3,2019-10-23 23:00:00+00:00,Team.DETROIT_PISTONS,Team.INDIANA_PACERS,,
4,2019-10-23 23:00:00+00:00,Team.CHICAGO_BULLS,Team.CHARLOTTE_HORNETS,,
5,2019-10-23 23:30:00+00:00,Team.MEMPHIS_GRIZZLIES,Team.MIAMI_HEAT,,
6,2019-10-23 23:30:00+00:00,Team.MINNESOTA_TIMBERWOLVES,Team.BROOKLYN_NETS,,
7,2019-10-23 23:30:00+00:00,Team.BOSTON_CELTICS,Team.PHILADELPHIA_76ERS,,
8,2019-10-24 00:30:00+00:00,Team.WASHINGTON_WIZARDS,Team.DALLAS_MAVERICKS,,
9,2019-10-24 00:30:00+00:00,Team.NEW_YORK_KNICKS,Team.SAN_ANTONIO_SPURS,,


In [15]:
all_schedules = get_nba_schedules(num_seasons=5)
box_scores = get_team_box_scores(all_schedules)


  0%|          | 0/1 [00:00<?, ?it/s][A

getting schedule 0



100%|██████████| 1/1 [00:22<00:00, 22.60s/it][A


In [21]:
all_schedules

Unnamed: 0,start_time,away_team,home_team,away_team_score,home_team_score,season,home_win
0,2018-10-17,PHILADELPHIA_76ERS,BOSTON_CELTICS,87,105,18-19,1
1,2018-10-17,OKLAHOMA_CITY_THUNDER,GOLDEN_STATE_WARRIORS,100,108,18-19,1
2,2018-10-17,MILWAUKEE_BUCKS,CHARLOTTE_HORNETS,113,112,18-19,0
3,2018-10-17,BROOKLYN_NETS,DETROIT_PISTONS,100,103,18-19,1
4,2018-10-17,MEMPHIS_GRIZZLIES,INDIANA_PACERS,83,111,18-19,1
5,2018-10-17,MIAMI_HEAT,ORLANDO_MAGIC,101,104,18-19,1
6,2018-10-17,ATLANTA_HAWKS,NEW_YORK_KNICKS,107,126,18-19,1
7,2018-10-17,CLEVELAND_CAVALIERS,TORONTO_RAPTORS,104,116,18-19,1
8,2018-10-18,NEW_ORLEANS_PELICANS,HOUSTON_ROCKETS,131,112,18-19,0
9,2018-10-18,MINNESOTA_TIMBERWOLVES,SAN_ANTONIO_SPURS,108,112,18-19,1


In [7]:
full_box_scores = get_full_box_scores(box_scores)
full_box_scores

  0%|          | 0/36 [00:00<?, ?it/s]
0it [00:00, ?it/s][A

Working on team ATLANTA_HAWKS
Working on row start_time               2018-10-24
away_team          DALLAS_MAVERICKS
home_team             ATLANTA_HAWKS
away_team_score                 104
home_team_score                 111
season                        18-19
home_win                          1
Name: 51, dtype: object


0it [00:01, ?it/s]
  0%|          | 0/36 [00:01<?, ?it/s]


IndexError: list index out of range

In [19]:
client.team_box_scores(day=1, month=1, year=2019)

[{'team': <Team.NEW_YORK_KNICKS: 'NEW YORK KNICKS'>,
  'minutes_played': 240,
  'made_field_goals': 42,
  'attempted_field_goals': 93,
  'made_three_point_field_goals': 11,
  'attempted_three_point_field_goals': 27,
  'made_free_throws': 13,
  'attempted_free_throws': 18,
  'offensive_rebounds': 15,
  'defensive_rebounds': 28,
  'assists': 28,
  'steals': 4,
  'blocks': 3,
  'turnovers': 8,
  'personal_fouls': 21},
 {'team': <Team.DENVER_NUGGETS: 'DENVER NUGGETS'>,
  'minutes_played': 240,
  'made_field_goals': 44,
  'attempted_field_goals': 95,
  'made_three_point_field_goals': 12,
  'attempted_three_point_field_goals': 34,
  'made_free_throws': 15,
  'attempted_free_throws': 23,
  'offensive_rebounds': 16,
  'defensive_rebounds': 37,
  'assists': 36,
  'steals': 7,
  'blocks': 6,
  'turnovers': 8,
  'personal_fouls': 19},
 {'team': <Team.PHILADELPHIA_76ERS: 'PHILADELPHIA 76ERS'>,
  'minutes_played': 240,
  'made_field_goals': 42,
  'attempted_field_goals': 87,
  'made_three_point_fie

In [15]:
client.team_box_scores(day=25, month=12, year=2018)

IndexError: list index out of range