In [1]:
from __future__ import print_function, division
%reload_ext autoreload
%autoreload 2


import pandas as pd
import seaborn as sns

import numpy as np

from tensorflow import keras

import matplotlib as mpl
import matplotlib.pyplot as plt

from tensorflow.keras import layers, regularizers

from tqdm import tqdm
from glob import glob
import datetime
import os, sys

from collections import OrderedDict

## some useful team data in here (converters from short to long):
from basketball_reference_web_scraper import data

## custom printing for my Keras training:
class PrintCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 25 == 0:
            print("epoch {}:  loss = {:.2f}, test loss = {:.2f}".format(
                epoch, logs['loss'], logs['val_loss']))
    def on_train_begin(self, logs={}):
        print("Beginning training...")
    
    def on_train_end(self, logs):
        print("Training completed")
        
def create_unique_id(row):
    return row['player_id'] + '-' + data.Team(row['team']).name

In [39]:
year = 2018

In [44]:
filter?

In [54]:
tot_df = pd.read_csv(f'scraped/stats_by_year/{year}_totals.csv')
adv_df = pd.read_csv(f'scraped/stats_by_year/{year}_advanced.csv')

## set the index to the player id to match up later and avoid dropping them
tot_df.set_index('player_id', inplace=True)
adv_df.set_index('player_id', inplace=True)

## drop column in common other than team:
columns_to_drop = filter(lambda x:  x != 'team', 
                         np.intersect1d(list(tot_df.keys()), list(adv_df.keys())))

adv_df.drop(columns=columns_to_drop, inplace=True)

## drop rows corresponding to a single team in a multi-team season
pids_with_totals = tot_df.index[tot_df['team'] == 'TOTAL']
for pid in pids_with_totals:
    for df in [tot_df, adv_df]:
        rows = df.loc[(df.index == pid) & (df['team'] != 'TOTAL')]
        df.drop(index=rows.index, inplace=True)


## concatenate the dataframes alongside each other
out = pd.concat((tot_df, adv_df), axis=1)    

In [56]:
out.shape

(481, 50)

In [7]:
directory = 'scraped/playoffs_by_series/2018/'
files = glob(directory+'/*_basic.csv')
basic_files = list(filter(lambda fname: True not in [f'_{x}_' in fname for x in range(10)], 
                     files))
playoff_series_files = {}

In [8]:
basic_files

['scraped/playoffs_by_series/2018/Eastern Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference First Round_basic.csv',
 'scraped/playoffs_by_series/2018/Finals_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Finals_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Finals_basic.csv']

In [78]:
basic_files

['scraped/playoffs_by_series/2018/Eastern Conference First Round_1_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_0_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference First Round_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_1_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Finals_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_1_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference First Round_3_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Semifinals_1_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_2_b

In [66]:
fname = 'scraped/playoffs_by_series/2018/Eastern Conference First Round_1_basic.csv'

In [69]:
True in 

True

In [62]:
basic_files

['scraped/playoffs_by_series/2018/Eastern Conference First Round_1_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_0_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference First Round_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Semifinals_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference Semifinals_1_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Finals_0_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_1_basic.csv',
 'scraped/playoffs_by_series/2018/Eastern Conference First Round_3_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference Semifinals_1_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_basic.csv',
 'scraped/playoffs_by_series/2018/Western Conference First Round_2_b

In [59]:
ls 'scraped/playoffs_by_series/2018/'

'Eastern Conference Finals_0_advanced.csv'
'Eastern Conference Finals_0_basic.csv'
'Eastern Conference First Round_0_advanced.csv'
'Eastern Conference First Round_0_basic.csv'
'Eastern Conference First Round_1_advanced.csv'
'Eastern Conference First Round_1_basic.csv'
'Eastern Conference First Round_2_advanced.csv'
'Eastern Conference First Round_2_basic.csv'
'Eastern Conference First Round_3_advanced.csv'
'Eastern Conference First Round_3_basic.csv'
'Eastern Conference First Round_advanced.csv'
'Eastern Conference First Round_basic.csv'
'Eastern Conference Semifinals_0_advanced.csv'
'Eastern Conference Semifinals_0_basic.csv'
'Eastern Conference Semifinals_1_advanced.csv'
'Eastern Conference Semifinals_1_basic.csv'
'Eastern Conference Semifinals_advanced.csv'
'Eastern Conference Semifinals_basic.csv'
 Finals_advanced.csv
 Finals_basic.csv
'Western Conference Finals_0_advanced.csv'
'Western Conference Finals_0_basic.csv'
'Western Conference First Round_0_advanced.

In [33]:
def parse_year(year):
    tot_df = pd.read_csv(f'scraped/stats_by_year/{year}_totals.csv')
    adv_df = pd.read_csv(f'scraped/stats_by_year/{year}_advanced.csv')

    ## drop column in common:
    adv_df.drop(columns=np.intersect1d(list(tot_df.keys()), list(adv_df.keys())), inplace=True)

    ## drop rows corresponding to a single team in a multi-team season
    pids_with_totals = tot_df['player_id'][tot_df['team'] == 'TOTAL']
    for pid in pids_with_totals:
        for df in [tot_df, adv_df]:
            rows = df.loc[(df['player_id'] == pid) & (df['team'] != 'TOTAL')]
            df.drop(index=rows.index, inplace=True)

    ## set the index to the player id to match up, then concatenate:
    tot_df.set_index('player_id', inplace=True)
    adv_df.set_index('player_id', inplace=True)
    
    out = pd.concat((tot_df, adv_df), axis=1)    

    ### now add on the playoff stats
    

In [None]:
for year in range(1970, 2019):
    yearly_totals_df = pd.read_csv(f'scraped/stats_by_year/{year}_totals.csv')
    yearly_advanced_df = pd.read_csv(f'scraped/stat_by_year/{year}_advanced.csv')

In [7]:
ls

LICENSE		      download_stats.py		  scraped/
README.md	      initial_explorations.ipynb  train_for_future_vorp.ipynb
all_players.csv       initial_explorations.py	  yearly_player_stats/
awards/		      list_of_nba_finals.csv
download_stats.ipynb  playoff_player_stats/


In [5]:
year = 2016

In [14]:
tot_df = pd.read_csv(f'scraped/stats_by_year/{year}_totals.csv')
adv_df = pd.read_csv(f'scraped/stats_by_year/{year}_advanced.csv')

## any columns that are in common (other than player ID) -- make sure they're the same, 
## then drop them from one of the df's

## now stack the two alongside each other:

## now combine any duplicated (traded) players

In [17]:
tot_df['unique_id'] = tot_df.apply(create_unique_id, axis=1)

In [10]:
yearly_advanced_df

Unnamed: 0,player_id,player_name,positions,age,team,games_played,minutes_played,player_efficiency_rating,true_shooting_percent,three_point_attempt_rate,...,blocks_pct,turnover_pct,usage_pct,offensive_win_shares,defensive_win_shares,total_win_shares,offensive_bpm,defensive_bpm,total_bpm,vorp
0,acyqu01,Quincy Acy,POWER FORWARD,25,SACRAMENTO KINGS,59,876,14.7,0.629,0.229,...,2.2,10.0,13.1,1.8,0.7,2.5,0.2,0.0,0.2,0.5
1,adamsjo01,Jordan Adams,SHOOTING GUARD,21,MEMPHIS GRIZZLIES,2,15,17.3,0.427,0.167,...,0.0,19.6,30.5,0.0,0.0,0.0,-2.9,4.8,1.9,0.0
2,adamsst01,Steven Adams,CENTER,22,OKLAHOMA CITY THUNDER,80,2014,15.5,0.621,0.000,...,3.3,14.1,12.6,4.2,2.3,6.5,0.8,1.3,2.1,2.1
3,afflaar01,Arron Afflalo,SHOOTING GUARD,30,NEW YORK KNICKS,71,2371,10.9,0.531,0.298,...,0.3,8.7,17.9,1.8,0.9,2.7,-0.6,-1.8,-2.4,-0.2
4,ajincal01,Alexis Ajinca,CENTER,27,NEW ORLEANS PELICANS,59,861,13.8,0.514,0.003,...,3.4,13.6,20.4,0.2,0.9,1.0,-4.3,-0.2,-4.5,-0.5
5,aldrico01,Cole Aldrich,CENTER,27,LOS ANGELES CLIPPERS,60,800,21.3,0.626,0.000,...,6.7,19.6,18.4,1.4,2.0,3.5,-1.0,5.8,4.8,1.4
6,aldrila01,LaMarcus Aldridge,POWER FORWARD,30,SAN ANTONIO SPURS,74,2261,22.4,0.565,0.015,...,2.8,7.8,25.9,5.6,4.5,10.1,0.7,1.1,1.8,2.2
7,alexacl01,Cliff Alexander,POWER FORWARD,20,PORTLAND TRAIL BLAZERS,8,36,11.6,0.500,0.000,...,4.4,9.1,13.3,0.0,0.0,0.0,-5.7,-1.3,-7.0,0.0
8,allenla01,Lavoy Allen,POWER FORWARD,26,INDIANA PACERS,79,1599,12.4,0.532,0.000,...,2.1,14.6,13.0,1.6,2.1,3.7,-2.1,1.4,-0.6,0.6
9,allento01,Tony Allen,SHOOTING GUARD,34,MEMPHIS GRIZZLIES,64,1620,12.9,0.505,0.090,...,1.0,12.8,16.8,0.5,1.9,2.4,-1.5,1.4,-0.1,0.8


In [9]:
yearly_totals_df

Unnamed: 0,player_id,player_name,positions,age,team,games_played,games_started,minutes_played,made_field_goals,attempted_field_goals,...,attempted_three_point_field_goals,made_free_throws,attempted_free_throws,offensive_rebounds,defensive_rebounds,assists,steals,blocks,turnovers,personal_fouls
0,acyqu01,Quincy Acy,POWER FORWARD,25,SACRAMENTO KINGS,59,29,876,119,214,...,49,50,68,65,123,27,29,24,27,103
1,adamsjo01,Jordan Adams,SHOOTING GUARD,21,MEMPHIS GRIZZLIES,2,0,15,2,6,...,1,3,5,0,2,3,3,0,2,2
2,adamsst01,Steven Adams,CENTER,22,OKLAHOMA CITY THUNDER,80,80,2014,261,426,...,0,114,196,219,314,62,42,89,84,223
3,afflaar01,Arron Afflalo,SHOOTING GUARD,30,NEW YORK KNICKS,71,57,2371,354,799,...,238,110,131,23,243,144,25,10,82,142
4,ajincal01,Alexis Ajinca,CENTER,27,NEW ORLEANS PELICANS,59,17,861,150,315,...,1,52,62,75,194,31,19,36,54,134
5,aldrico01,Cole Aldrich,CENTER,27,LOS ANGELES CLIPPERS,60,5,800,134,225,...,0,60,84,86,202,50,47,68,64,139
6,aldrila01,LaMarcus Aldridge,POWER FORWARD,30,SAN ANTONIO SPURS,74,74,2261,536,1045,...,16,259,302,176,456,110,38,81,99,151
7,alexacl01,Cliff Alexander,POWER FORWARD,20,PORTLAND TRAIL BLAZERS,8,0,36,5,10,...,0,0,0,2,4,0,1,2,1,1
8,allenla01,Lavoy Allen,POWER FORWARD,26,INDIANA PACERS,79,28,1599,191,370,...,0,46,73,162,262,76,26,42,69,147
9,allento01,Tony Allen,SHOOTING GUARD,34,MEMPHIS GRIZZLIES,64,57,1620,215,469,...,42,90,138,104,192,70,110,18,78,175
