## Getting Up to Date Data
This webscraper collects nba stat data for all teams past game and their current three game average  
Website used for web scraping : https://www.teamrankings.com/nba/stat/


In [1]:
import pandas as pd
import json

In [2]:
# determines if the last game was a home or away game for a given team
def get_last_game_location(team):
    URL = f'https://www.teamrankings.com/nba/team/{team}/'
    schedule = pd.read_html(URL)[1]
    location = (schedule
        .dropna()
        .iloc[-1]
        ["Location"]
    )

    return 1.0 if location == 'Home' else 0.0

# 
def get_recent_stats(stat, URL):
    URL = f'https://www.teamrankings.com/nba/stat/{URL}'
    stats_table = pd.read_html(URL, index_col=1)[0]

    stats_table = stats_table.sort_index()
    stats_table = stats_table[['Last 3', 'Last 1']]
    stats_table.columns = [f'{stat}_AVG', stat]

    return stats_table

# retrieves url data from json file 
file = open('NBA Stat Data.json')
stats_urls = json.load(file)
stats_urls = list(stats_urls.items())

# finds and combines all recent stat data for every team in the nba
NBA_current_stats = [get_recent_stats(stat, stat_url) for (stat, stat_url) in stats_urls]
NBA_current_stats = pd.concat(NBA_current_stats, axis=1)

# define total rebounds (TRB) as a sum of offensive rebound (ORB) and defensive rebound (DRB)
NBA_current_stats['TRB'] = [ORB + DRB for ORB, DRB in zip(NBA_current_stats['ORB'], NBA_current_stats['DRB'])]
NBA_current_stats['TRB_AVG'] = [ORB_AVG + DRB_AVG for ORB_AVG, DRB_AVG in zip(NBA_current_stats['ORB_AVG'], NBA_current_stats['DRB_AVG'])]
NBA_current_stats['TRB_OPP'] = [ORB + DRB for ORB, DRB in zip(NBA_current_stats['ORB_OPP'], NBA_current_stats['DRB_OPP'])]
NBA_current_stats['TRB_OPP_AVG'] = [ORB_AVG + DRB_AVG for ORB_AVG, DRB_AVG in zip(NBA_current_stats['ORB_OPP_AVG'], NBA_current_stats['DRB_OPP_AVG'])]

# determines the location of the last game played by a team
URL = 'https://www.teamrankings.com/nba/teams/'
teams = list(pd.read_html(URL)[0]['Team'])
NBA_current_stats['HOME/AWAY'] = [get_last_game_location(team.lower().replace(' ', '-')) for team in teams]

# converts all percent data (stored as strings) and converts them into floats
pct_cols = NBA_current_stats.columns[NBA_current_stats.dtypes == "object"]
for col in pct_cols:
    NBA_current_stats[col] = NBA_current_stats[col].str.rstrip('%').astype('float') / 100.0

# converts all data points to numeric data
NBA_current_stats = NBA_current_stats.apply(pd.to_numeric)

# saves data as a csv
NBA_current_stats.to_csv('../Data/Current NBA Game Data.csv')
NBA_current_stats


Unnamed: 0_level_0,PTS_AVG,PTS,PTS_OPP_AVG,PTS_OPP,WIN/LOSS_AVG,WIN/LOSS,FG_AVG,FG,FGA_AVG,FGA,...,BLK_OPP,TOV_OPP_AVG,TOV_OPP,PF_OPP_AVG,PF_OPP,TRB,TRB_AVG,TRB_OPP,TRB_OPP_AVG,HOME/AWAY
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Atlanta,115.0,113.0,121.7,118.0,0.0,0.0,42.7,41.0,93.7,89.0,...,2.0,13.0,10.0,17.0,17.0,40.0,41.3,48.0,49.0,0.0
Boston,138.3,126.0,114.0,115.0,1.0,1.0,48.7,46.0,92.3,91.0,...,1.0,10.0,12.0,19.3,19.0,44.0,48.3,43.0,38.0,0.0
Brooklyn,122.0,122.0,123.7,144.0,0.667,0.0,43.7,45.0,89.7,101.0,...,8.0,12.7,10.0,23.3,18.0,47.0,42.3,41.0,43.0,1.0
Charlotte,104.0,104.0,119.7,113.0,0.0,0.0,40.7,43.0,92.0,93.0,...,3.0,14.3,13.0,18.3,14.0,42.0,42.0,45.0,43.6,0.0
Chicago,109.0,118.0,105.7,113.0,0.667,1.0,42.0,42.0,87.3,92.0,...,3.0,16.0,16.0,16.0,21.0,48.0,40.3,40.0,43.0,1.0
Cleveland,108.7,113.0,109.3,110.0,0.667,1.0,40.0,40.0,89.0,92.0,...,7.0,10.3,10.0,19.3,21.0,57.0,46.7,37.0,38.0,0.0
Dallas,127.3,110.0,115.3,113.0,0.667,0.0,45.3,39.0,90.7,88.0,...,4.0,14.0,16.0,19.3,21.0,37.0,42.4,57.0,48.6,1.0
Denver,114.7,120.0,108.7,114.0,1.0,1.0,41.3,40.0,86.3,82.0,...,3.0,14.0,13.0,20.3,23.0,43.0,44.0,39.0,43.7,1.0
Detroit,112.7,112.0,121.0,118.0,0.0,0.0,40.7,40.0,85.0,90.0,...,7.0,11.3,9.0,20.7,22.0,52.0,43.0,42.0,41.4,1.0
Golden State,123.0,114.0,114.7,120.0,0.667,0.0,45.0,40.0,89.0,87.0,...,4.0,11.7,14.0,17.7,17.0,39.0,48.3,43.0,37.3,0.0
