Author: Aydin Najl Hossaini  
Date: 24/08/2024

In [1]:
# Imports
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType, Team

In [2]:
from tqdm.notebook import  tqdm
import requests
from bs4 import BeautifulSoup

In [3]:
# Test
# Get play-by-play for Boston Celtics game on October 16th, 2018
print("Getting play-by-play for Boston Celtics game on October 16th, 2018")
client.play_by_play(home_team=Team.BOSTON_CELTICS, year=2018, month=10, day=16)


Getting play-by-play for Boston Celtics game on October 16th, 2018


[{'period': 1,
  'period_type': <PeriodType.QUARTER: 'QUARTER'>,
  'remaining_seconds_in_period': 700.0,
  'relevant_team': <Team.PHILADELPHIA_76ERS: 'PHILADELPHIA 76ERS'>,
  'away_team': <Team.PHILADELPHIA_76ERS: 'PHILADELPHIA 76ERS'>,
  'home_team': <Team.BOSTON_CELTICS: 'BOSTON CELTICS'>,
  'away_score': 0,
  'home_score': 0,
  'description': 'R. Covington misses 3-pt jump shot from 27 ft'},
 {'period': 1,
  'period_type': <PeriodType.QUARTER: 'QUARTER'>,
  'remaining_seconds_in_period': 700.0,
  'relevant_team': <Team.BOSTON_CELTICS: 'BOSTON CELTICS'>,
  'away_team': <Team.PHILADELPHIA_76ERS: 'PHILADELPHIA 76ERS'>,
  'home_team': <Team.BOSTON_CELTICS: 'BOSTON CELTICS'>,
  'away_score': 0,
  'home_score': 0,
  'description': 'Defensive rebound by Team'},
 {'period': 1,
  'period_type': <PeriodType.QUARTER: 'QUARTER'>,
  'remaining_seconds_in_period': 675.0,
  'relevant_team': <Team.BOSTON_CELTICS: 'BOSTON CELTICS'>,
  'away_team': <Team.PHILADELPHIA_76ERS: 'PHILADELPHIA 76ERS'>,
  '

In [4]:
# Get all the games of Lebron in the 2018 season

url = "https://www.basketball-reference.com/players/j/jamesle01/gamelog/2018/"
soup = BeautifulSoup(requests.get(url).content, "html.parser") # Gets full page from URL and parses it with HTML parser

date_game_elements = soup.find_all('td', {'data-stat': 'date_game'}) # Gets the td HTML string of the dates

dates_list = []
for elem in date_game_elements: # Parses the date string to get the individual date elements
    date = elem.get_text(strip=True)
    dates_list.append(date.split("-"))

dates_list

[['2017', '10', '17'],
 ['2017', '10', '20'],
 ['2017', '10', '21'],
 ['2017', '10', '24'],
 ['2017', '10', '25'],
 ['2017', '10', '28'],
 ['2017', '10', '29'],
 ['2017', '11', '01'],
 ['2017', '11', '03'],
 ['2017', '11', '05'],
 ['2017', '11', '07'],
 ['2017', '11', '09'],
 ['2017', '11', '11'],
 ['2017', '11', '13'],
 ['2017', '11', '15'],
 ['2017', '11', '17'],
 ['2017', '11', '20'],
 ['2017', '11', '22'],
 ['2017', '11', '24'],
 ['2017', '11', '27'],
 ['2017', '11', '28'],
 ['2017', '11', '30'],
 ['2017', '12', '02'],
 ['2017', '12', '04'],
 ['2017', '12', '06'],
 ['2017', '12', '08'],
 ['2017', '12', '09'],
 ['2017', '12', '12'],
 ['2017', '12', '14'],
 ['2017', '12', '16'],
 ['2017', '12', '17'],
 ['2017', '12', '19'],
 ['2017', '12', '21'],
 ['2017', '12', '25'],
 ['2017', '12', '27'],
 ['2017', '12', '30'],
 ['2018', '01', '02'],
 ['2018', '01', '03'],
 ['2018', '01', '06'],
 ['2018', '01', '08'],
 ['2018', '01', '11'],
 ['2018', '01', '12'],
 ['2018', '01', '15'],
 ['2018', '

In [7]:
# TODO change all Team.name in away games to the host teamname
# Output all advanced player season totals for the 2017-2018 season in CSV format to 2018_10_06_BOS_PBP.csv
for date in dates_list:
    year = date[0]
    month = date[1]
    day = date[2]
    print(f"Writing play-by-play for Cavs game on {year}-{month}-{day} to CSV file")
    try:
        client.play_by_play(home_team=Team.CLEVELAND_CAVALIERS, year=year, month=month, day=day, output_type=OutputType.CSV, output_file_path=f"pbp_games/{year}_{month}_{day}_CLE_PBP.csv") # Stores all PBP as CSV's in folder
    except Exception:
        print("Away game")
        pass

Writing play-by-play for Cavs game on 2017-10-17 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-10-20 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-10-21 to CSV file
Writing play-by-play for Cavs game on 2017-10-24 to CSV file
Writing play-by-play for Cavs game on 2017-10-25 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-10-28 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-10-29 to CSV file
Writing play-by-play for Cavs game on 2017-11-01 to CSV file
Writing play-by-play for Cavs game on 2017-11-03 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-11-05 to CSV file
Writing play-by-play for Cavs game on 2017-11-07 to CSV file
Writing play-by-play for Cavs game on 2017-11-09 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-11-11 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-11-13 to CSV file
Away game
Writing play-by-play for Cavs game on 2017-11-15 to CSV file
Away 

TODO  
1. Change teamnname in the home_team in the away games (get the away name from the same page (check if @=True then away))
2. Cut csv to contain only LBJ free throws 
    Maybe in the future use FG's as well 
3. Use prediction model


Below code is a hassle because im unable to select team and therefore it will scrape all games from a season

In [28]:
# import nba_scraper.nba_scraper as ns

# #scrape a season
# nba_df = ns.scrape_season(2019)

# if you want a csv if you don't pass a file path the default is home
# directory
# ns.scrape_game([21800001], data_format='csv', data_dir=r"C:\Users\aydin\playground\python\bball\pbp_games")

Scraping game id: 0021800001
[1610612738]


  (clean_df["homedescription"].str.contains("3PT")).fillna(False)
  | (clean_df["visitordescription"].str.contains("3PT")).fillna(False),
  (clean_df["homedescription"].str.contains("Turnover")).fillna(False)
  | (clean_df["visitordescription"].str.contains("Turnover")).fillna(False),
  (clean_df["homedescription"].str.contains("STEAL")).fillna(False)
  | (clean_df["visitordescription"].str.contains("STEAL")).fillna(False),
