# NBA stats scraper and analyzer

In [1]:
from bs4 import BeautifulSoup, Comment
import pathlib
import json
import time
from scraper.utils import find_text_of_p_with
from scraper.scraper import scrap_functions
from scraper.url import get_seasons_url, get_players_url, get_player_soup
from tqdm import tqdm

In [2]:
START_YEAR = 2010
END_YEAR = 2020
OUTPUT_DIR = pathlib.Path() / 'data'
OVERWRITE = True

# Set up constants
BASE_URL = 'https://www.basketball-reference.com'
SEASONS_URL = 'https://www.basketball-reference.com/leagues/NBA_{}_per_game.html'
PLAYER_URL = 'https://www.basketball-reference.com'


In [3]:
# Download HTML to scrape
seasons = get_seasons_url(START_YEAR, END_YEAR)
for season in seasons:
    # Get the players
    player_urls = get_players_url(season)
    

    season_year = int(season.split('_')[1])
    season_str = f'{season_year}-{(season_year + 1) % 100}'
    
    print(f'Processing season {season_year}, found {len(player_urls)} players')

    players_data = {}
    for url in tqdm(player_urls):

        players_data[url] = {}
        players_data[url]['season'] = season_year
        players_data[url]['player'] = url
        soup_file = get_player_soup(url)
        
        # Scrape HTML
        for key, function in scrap_functions.items():
            players_data[url][key] = function(soup_file, season=season_str)
            time.sleep(0.01)

        time.sleep(0.1)
    
    # Save the data to a JSON file
    destination = OUTPUT_DIR / f'{season_year}.json'
    with open(str(destination), 'w') as f:
        print(f'Saving data to {destination}')
        json.dump(players_data, f, indent=4, default=str)



Processing season 2010, found 401 players


  0%|          | 1/261 [00:01<04:56,  1.14s/it]

Born: May 6, 1984 (Age: 38-153d) in Baton Rouge, Louisiana us


  1%|          | 2/261 [00:02<06:34,  1.52s/it]

Born: October 26, 1985 (Age: 36-345d) in Rome, Italy it


  1%|          | 3/261 [00:04<06:47,  1.58s/it]

Born: November 3, 1985 (Age: 36-337d) in Poplar Bluff, Missouri us
Born: October 27, 1987 (Age: 34-344d) in Plainsboro, New Jersey us


  2%|▏         | 5/261 [00:07<05:57,  1.40s/it]

Born: February 23, 1987 (Age: 35-225d) in Detroit, Michigan us


  2%|▏         | 6/261 [00:09<06:34,  1.55s/it]

Born: April 29, 1987 (Age: 35-160d) in Ontario, California us
Born: June 12, 1976 (Age: 46-116d) in Shreveport, Louisiana us


  3%|▎         | 8/261 [00:12<06:58,  1.66s/it]

Born: December 8, 1982 (Age: 39-302d) in Portland, Oregon us
Born: February 10, 1985 (Age: 37-238d) in Monroe, Louisiana us


  4%|▍         | 10/261 [00:16<07:43,  1.85s/it]

Born: November 30, 1986 (Age: 35-310d) in Los Angeles, California us
Born: May 7, 1978 (Age: 44-152d) in Waukegan, Illinois us


  4%|▍         | 11/261 [00:19<08:13,  1.98s/it]

Born: January 9, 1989 (Age: 33-270d) in Cheverly, Maryland us


  5%|▍         | 12/261 [00:21<08:11,  1.97s/it]

Born: December 2, 1985 (Age: 36-308d) in Los Angeles, California us


  5%|▌         | 14/261 [00:24<07:16,  1.77s/it]

Born: May 19, 1985 (Age: 37-140d) in Ahvaz, Iran ir


  6%|▌         | 15/261 [00:26<07:25,  1.81s/it]

Born: May 7, 1979 (Age: 43-152d) in Milwaukee, Wisconsin us


  6%|▌         | 16/261 [00:28<07:22,  1.81s/it]

Born: November 20, 1975 (Age: 46-320d) in Fort Madison, Iowa us


  7%|▋         | 17/261 [00:29<06:52,  1.69s/it]

Born: April 18, 1986 (Age: 36-171d) in Jamaica, New York us


  7%|▋         | 18/261 [00:30<06:07,  1.51s/it]

Born: October 10, 1986 (Age: 35-361d) in Sydney, Australia au
Born: March 25, 1986 (Age: 36-195d) in San Giovanni in Persiceto, Italy it


  7%|▋         | 19/261 [00:32<06:41,  1.66s/it]

Born: November 20, 1981 (Age: 40-320d) in Aschaffenburg, Germany de


  8%|▊         | 20/261 [00:34<07:05,  1.76s/it]

Born: August 23, 1987 (Age: 35-044d) in Rancho Cucamonga, California us


  8%|▊         | 21/261 [00:36<07:08,  1.79s/it]

Born: November 10, 1987 (Age: 34-330d) in New Orleans, Louisiana us


  8%|▊         | 22/261 [00:38<08:01,  2.01s/it]

Born: March 19, 1976 (Age: 46-201d) in Los Angeles, California us


  9%|▉         | 23/261 [00:41<08:55,  2.25s/it]

Born: May 17, 1982 (Age: 40-142d) in Bruges, Belgium be


  9%|▉         | 24/261 [00:44<09:02,  2.29s/it]

Born: December 30, 1980 (Age: 41-280d) in Kinshasa, Democratic Republic of the Congo cd


 10%|▉         | 25/261 [00:46<08:40,  2.21s/it]

Born: August 9, 1974 (Age: 48-058d) in Little Rock, Arkansas us


 10%|▉         | 26/261 [00:49<09:37,  2.46s/it]