In [11]:
import pandas as pd
import requests
from datetime import datetime

# Deal with Injurty data

In [None]:
df = pd.read_csv('injury.csv', header=0)

In [None]:
df

In [None]:
df['Date'] = df['Date'].apply(lambda x:'0'+str(x) if len(str(x)) == 1 else str(x))


In [None]:
for n in df['Content'].unique():
    if 'rest' in n or 'returned' in n or 'sore' in n:
        print(n)

In [None]:
delete = ['sore left foot', 'sore right knee','sore left shin','rest','general soreness / rest','returned to lineup','sore right shoulder']
for n in delete:
    df = df.drop(df.loc[df.Content == n].index)

In [None]:
df.set_index('Date').to_csv('injury.csv',index='Date')

In [None]:
df.drop_duplicates().set_index('Date').to_csv('injury.csv')

# Get data from NBA stats

In [None]:
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }

In [None]:
# Get all Kobe shot data from 1996 to 2016 and put it into an array
# This player ID comes from stats.nba.com (http://stats.nba.com/player/#!/977/stats/)
playerID = "977"
seasons = []
for season in range(1996,2016):
    # The stats.nba.com API wants season as "1996-97"
    seasonString = str(season) + '-' + str(season+1)[2:]

    # The stats.nba.com endpoint we are using is http://stats.nba.com/stats/shotchartdetail
    # More info on endpoints: https://github.com/seemethere/nba_py/wiki/stats.nba.com-Endpoint-Documentation
    shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=' + seasonString + '&AheadBehind=&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Regular+Season&SeasonSegment=&GameID=&PlayerID=' + playerID + '&CFID=&PlayerPosition='
    response = requests.get(shot_chart_url, headers=HEADERS)
    # Split response into headers and content
    headers = response.json()['resultSets'][0]['headers']
    shots = response.json()['resultSets'][0]['rowSet']

    # Create pandas dataframe to hold the data
    shot_df = pd.DataFrame(shots, columns=headers)

    # add extra column for season
    shot_df['SEASON'] = seasonString

    # add extra column for playoff flag
    shot_df['playoffs'] = 0
    
    seasons.append(shot_df)
    

# Do the same thing for all the playoff shots
for season in range(1996,2016):
    seasonString = str(season) + '-' + str(season+1)[2:]

    # This URL is the same except for the parameter SeasonType=Playoffs
    shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=' + seasonString + '&AheadBehind=&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Playoffs&SeasonSegment=&GameID=&PlayerID=' + playerID + '&CFID=&PlayerPosition='
    response = requests.get(shot_chart_url, headers=HEADERS)
    
    headers = response.json()['resultSets'][0]['headers']
    shots = response.json()['resultSets'][0]['rowSet']
    
    shot_df = pd.DataFrame(shots, columns=headers)
    shot_df['SEASON'] = str(season) + '-' + str(season+1)[2:]
    shot_df['playoffs'] = 1
    
    seasons.append(shot_df)

In [None]:
# combine all season and playoffs dataframes into one dataframe
kobe_all_shots = pd.concat(seasons)
kobe_all_shots.columns = [n.lower() for n in kobe_all_shots.columns]
# dump a csv file
kobe_all_shots.to_csv("kobe_all_shots.csv")

# Add Score column for each shot

In [None]:
df = pd.read_csv('kobe.csv', index_col=0)

In [None]:
df['score'] = df['shot_made_flag'] * 1

In [None]:
df['score'] = df['score'] * df['shot_type'].apply(lambda x:2 if '2' in x else 3)

In [None]:
df.to_csv("kobe.csv")

# Add injury column

In [6]:
data = pd.read_csv('kobe.csv', index_col=0)
injury = pd.read_csv('complete_injury.csv')

In [15]:
injury['Date'] = injury['Date'].apply(lambda x:datetime.strptime(x, '%m/%d/%y').strftime("%Y%m%d"))

In [18]:
injury = injury.rename(columns={'Date':'game_date'})

In [23]:
data.merge(injury, on='game_date', how='left')['Notes'].unique()

array([nan], dtype=object)

In [26]:
data['game_date']

0      19961103
1      19961105
2      19961106
3      19961106
4      19961106
5      19961108
6      19961108
7      19961108
8      19961108
9      19961108
10     19961108
11     19961108
12     19961108
13     19961110
14     19961110
15     19961110
16     19961112
17     19961112
18     19961113
19     19961113
20     19961115
21     19961115
22     19961115
23     19961115
24     19961117
25     19961117
26     19961117
27     19961117
28     19961117
29     19961117
         ...   
271    20120521
272    20120521
273    20120521
274    20120521
275    20120521
276    20120521
277    20120521
278    20120521
279    20120521
280    20120521
281    20120521
282    20120521
283    20120521
284    20120521
285    20120521
286    20120521
287    20120521
288    20120521
289    20120521
290    20120521
291    20120521
292    20120521
293    20120521
294    20120521
295    20120521
296    20120521
297    20120521
298    20120521
299    20120521
300    20120521
Name: game_date, Length: