In [2]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import cross_validation
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression, LogisticRegressionCV



In [5]:
shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=2015-16&AheadBehind=&PlayerID=201939&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Regular+Season&SeasonSegment=&GameID='
# Get the webpage containing the data
response = requests.get(shot_chart_url)
print response
# Grab the headers to be used as column headers for our DataFrame
headers = response.json()['resultSets'][0]['headers']
print headers
# Grab the shot chart data
shots = response.json()['resultSets'][0]['rowSet']

<Response [200]>
[u'GRID_TYPE', u'GAME_ID', u'GAME_EVENT_ID', u'PLAYER_ID', u'PLAYER_NAME', u'TEAM_ID', u'TEAM_NAME', u'PERIOD', u'MINUTES_REMAINING', u'SECONDS_REMAINING', u'EVENT_TYPE', u'ACTION_TYPE', u'SHOT_TYPE', u'SHOT_ZONE_BASIC', u'SHOT_ZONE_AREA', u'SHOT_ZONE_RANGE', u'SHOT_DISTANCE', u'LOC_X', u'LOC_Y', u'SHOT_ATTEMPTED_FLAG', u'SHOT_MADE_FLAG']


In [6]:
shot_df = pd.DataFrame(shots, columns=headers)

In [7]:
def create_dataFrame(shot_chart_url):
    response = requests.get(shot_chart_url)
    # Grab the headers to be used as column headers for our DataFrame
    headers = response.json()['resultSets'][0]['headers']
    # Grab the shot chart data
    shots = response.json()['resultSets'][0]['rowSet']
    return pd.DataFrame(shots, columns=headers)
    

In [8]:
shot_df = create_dataFrame(shot_chart_url)
shot_df.columns

Index([u'GRID_TYPE', u'GAME_ID', u'GAME_EVENT_ID', u'PLAYER_ID',
       u'PLAYER_NAME', u'TEAM_ID', u'TEAM_NAME', u'PERIOD',
       u'MINUTES_REMAINING', u'SECONDS_REMAINING', u'EVENT_TYPE',
       u'ACTION_TYPE', u'SHOT_TYPE', u'SHOT_ZONE_BASIC', u'SHOT_ZONE_AREA',
       u'SHOT_ZONE_RANGE', u'SHOT_DISTANCE', u'LOC_X', u'LOC_Y',
       u'SHOT_ATTEMPTED_FLAG', u'SHOT_MADE_FLAG'],
      dtype='object')

In [9]:
def transform(data):
    action_type = pd.get_dummies(data.ACTION_TYPE)
    period = pd.get_dummies(data.PERIOD)
    shot_type = pd.get_dummies(data.SHOT_TYPE)
    shot_zone_basic = pd.get_dummies(data.SHOT_ZONE_BASIC)
    shot_zone_area = pd.get_dummies(data.SHOT_ZONE_AREA)
    shot_zone_range = pd.get_dummies(data.SHOT_ZONE_RANGE)

    new_shot_chart = pd.concat([action_type, period, shot_type, shot_zone_basic, shot_zone_area, shot_zone_range, data.SHOT_MADE_FLAG], axis=1)
    return new_shot_chart

In [10]:
new_shot_chart= transform(shot_df)

In [11]:
logistic = LogisticRegression()
def train_test_splitter(model, X, y, train_size=0.5):
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size)
    model.fit(X_train, y_train)
    return X_train, X_test, y_train, y_test, model

In [13]:
predictors = new_shot_chart.columns[:-1]
print predictors
X_train, X_test, y_train, y_test, model = train_test_splitter(logistic, new_shot_chart[predictors], new_shot_chart.SHOT_MADE_FLAG)

model.score(X_test, y_test)

Index([              u'Alley Oop Layup shot',
           u'Cutting Finger Roll Layup Shot',
                       u'Cutting Layup Shot',
                        u'Driving Bank shot',
                        u'Driving Dunk Shot',
           u'Driving Finger Roll Layup Shot',
          u'Driving Floating Bank Jump Shot',
               u'Driving Floating Jump Shot',
                        u'Driving Hook Shot',
                       u'Driving Layup Shot',
               u'Driving Reverse Layup Shot',
                       u'Fadeaway Bank shot',
                       u'Fadeaway Jump Shot',
                   u'Finger Roll Layup Shot',
                       u'Floating Jump shot',
                                u'Hook Shot',
                           u'Jump Bank Shot',
                                u'Jump Shot',
                               u'Layup Shot',
                         u'Pullup Bank shot',
                         u'Pullup Jump shot',
                       u'Putback L

0.69109947643979053

In [14]:
def test(url):
    shots = create_dataFrame(url)
    transformed = transform(shots)
    predictors = transformed.columns[:-1]
    X_train, X_test, y_train, y_test, model = train_test_splitter(logistic, transformed[predictors], transformed.SHOT_MADE_FLAG)
    return model.score(X_test, y_test)

In [16]:
test(shot_chart_url)

0.68062827225130895

In [22]:
shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=2014-15&AheadBehind=&PlayerID=201939&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Regular+Season&SeasonSegment=&GameID='
test(shot_chart_url)

0.63338301043219081

In [28]:
shots_2014 = create_dataFrame(shot_chart_url)

In [55]:
combined = pd.concat([shot_df, shot_df_2014])
new_shot_chart_combined= transform(combined)
predictors = new_shot_chart_combined.columns[:-1]

In [79]:
X_train, X_test, y_train, y_test, model = train_test_splitter(logistic, new_shot_chart_combined[predictors], new_shot_chart_combined.SHOT_MADE_FLAG)

model.score(X_test, y_test)

0.6604850213980028

In [24]:
import webscrape

df = webscrape.getData("stephen curry")

ID:  201939
    Game Date         Player               Opp.  Q   Time          Shot Type  \
0  2014-12-13  Stephen Curry   Dallas Mavericks  3   2:25  Driving Bank shot   
1  2015-02-20  Stephen Curry  San Antonio Spurs  3  10:41  Driving Bank shot   
2  2014-12-13  Stephen Curry   Dallas Mavericks  4   7:05  Driving Bank shot   
3  2015-03-21  Stephen Curry          Utah Jazz  2   0:34  Driving Bank shot   
4  2015-04-04  Stephen Curry   Dallas Mavericks  1  11:23  Driving Bank shot   

  Shot Dist. Made? Drib. Shot Clock Touch Time         Defender Def Dist.  \
0    5.20ft.   Yes     8      16.00       6.30   Nowitzki, Dirk      3.20   
1   13.60ft.   Yes     5       2.90       4.50     Parker, Tony      1.70   
2    6.00ft.   Yes     0       9.30       0.80  Chandler, Tyson      3.70   
3    9.00ft.   Yes    10      15.30       8.70  Favors, Derrick      2.30   
4    4.10ft.   Yes     0      20.80       0.80   Nowitzki, Dirk      2.50   

               
0  \n\n\n  \n  
1  \n\n\n  \

In [56]:
df

Unnamed: 0,Game Date,Player,Opp.,Q,Time,Shot Type,Shot Dist.,Made?,Drib.,Shot Clock,Touch Time,Defender,Def Dist.,Unnamed: 14,Unnamed: 15
0,2014-12-13,Stephen Curry,Dallas Mavericks,3,2:25,Driving Bank shot,5.20ft.,Yes,8,16.00,6.30,"Nowitzki, Dirk",3.20,\n\n\n,\n
1,2015-02-20,Stephen Curry,San Antonio Spurs,3,10:41,Driving Bank shot,13.60ft.,Yes,5,2.90,4.50,"Parker, Tony",1.70,\n\n\n,\n
2,2014-12-13,Stephen Curry,Dallas Mavericks,4,7:05,Driving Bank shot,6.00ft.,Yes,0,9.30,0.80,"Chandler, Tyson",3.70,\n\n\n,\n
3,2015-03-21,Stephen Curry,Utah Jazz,2,0:34,Driving Bank shot,9.00ft.,Yes,10,15.30,8.70,"Favors, Derrick",2.30,\n\n\n,\n
4,2015-04-04,Stephen Curry,Dallas Mavericks,1,11:23,Driving Bank shot,4.10ft.,Yes,0,20.80,0.80,"Nowitzki, Dirk",2.50,\n\n\n,\n
5,2015-03-21,Stephen Curry,Utah Jazz,4,1:56,Driving Finger Roll Layup Shot,4.40ft.,No,1,20.90,1.80,"Burke, Trey",3.80,\n\n\n,\n
6,2015-03-06,Stephen Curry,Dallas Mavericks,1,4:54,Driving Finger Roll Layup Shot,6.10ft.,No,3,21.00,2.90,"Chandler, Tyson",4.10,\n\n\n,\n
7,2015-03-23,Stephen Curry,Washington Wizards,2,2:07,Driving Finger Roll Layup Shot,6.20ft.,Yes,2,14.50,3.10,"Wall, John",0.30,\n\n\n,\n
8,2014-11-02,Stephen Curry,Portland Trail Blazers,2,2:47,Driving Finger Roll Layup Shot,6.10ft.,No,5,18.30,4.40,"Blake, Steve",2.70,\n\n\n,\n
9,2014-12-10,Stephen Curry,Houston Rockets,4,7:05,Driving Finger Roll Layup Shot,4.60ft.,Yes,14,13.60,10.50,"Harden, James",4.30,\n\n\n,\n


In [57]:
shot_df

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG
0,Shot Chart Detail,0021500003,6,201939,Stephen Curry,1610612744,Golden State Warriors,1,10,56,...,Cutting Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,3,12,31,1,1
1,Shot Chart Detail,0021500003,9,201939,Stephen Curry,1610612744,Golden State Warriors,1,10,29,...,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,26,-176,195,1,0
2,Shot Chart Detail,0021500003,14,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,34,...,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,20,9,1,0
3,Shot Chart Detail,0021500003,19,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,13,...,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,27,-197,193,1,1
4,Shot Chart Detail,0021500003,36,201939,Stephen Curry,1610612744,Golden State Warriors,1,7,11,...,Running Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,-4,8,1,1
5,Shot Chart Detail,0021500003,38,201939,Stephen Curry,1610612744,Golden State Warriors,1,6,45,...,Jump Shot,3PT Field Goal,Above the Break 3,Center(C),24+ ft.,25,-11,259,1,1
6,Shot Chart Detail,0021500003,46,201939,Stephen Curry,1610612744,Golden State Warriors,1,5,50,...,Jump Shot,2PT Field Goal,Mid-Range,Left Side Center(LC),16-24 ft.,22,-117,188,1,1
7,Shot Chart Detail,0021500003,55,201939,Stephen Curry,1610612744,Golden State Warriors,1,5,3,...,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,28,-179,228,1,1
8,Shot Chart Detail,0021500003,68,201939,Stephen Curry,1610612744,Golden State Warriors,1,3,57,...,Step Back Jump shot,3PT Field Goal,Above the Break 3,Right Side Center(RC),24+ ft.,25,89,239,1,1
9,Shot Chart Detail,0021500003,82,201939,Stephen Curry,1610612744,Golden State Warriors,1,3,6,...,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,27,-156,232,1,0


In [69]:
print df.columns
print df["Made?"].unique()

Index([u'Game Date', u'Player', u'Opp.', u'Q', u'Time', u'Shot Type',
       u'Shot Dist.', u'Made?', u'Drib.', u'Shot Clock', u'Touch Time',
       u'Defender', u'Def Dist.', u'', u''],
      dtype='object')
[u'Yes' u'No']


In [71]:
defensive_rating = {"Golden State Warriors"	: 101.67,
"Los Angeles Clippers"	: 106.14,
"San Antonio Spurs"	: 102.30,
"Atlanta Hawks"	: 104.56,
"Portland Trail Blazers"	: 104.03,
"Cleveland Cavaliers"	: 107.58,
"Houston Rockets"	: 103.85,
"Memphis Grizzlies"	: 102.41,
"Dallas Mavericks"	: 106.87,
"Chicago Bulls"	: 105.53,
"Toronto Raptors"	: 109.35,
"Oklahoma City Thunder"	: 105.89,
"New Orleans Pelicans"	: 107.88,
"Utah Jazz"	: 105.43,
"Washington Wizards"	: 104.13,
"Indiana Pacers"	: 104.48	,
"Milwaukee Bucks"	: 103.49	,
"Phoenix Suns"	: 106.34	,
"Boston Celtics"	: 105.58	,
"Detroit Pistons"	: 107.77	,
"Denver Nuggets"	: 108.22	,
"Sacramento Kings"	: 109.51	,
"Brooklyn Nets"	: 108.48	,
"Miami Heat"	: 108.36	,
"Charlotte Hornets"	: 104.55	,
"Orlando Magic"	: 108.54	,
"Los Angeles Lakers"	: 110.90	,
"Minnesota Timberwolves"	: 112.50	,
"Philadelphia 76ers"	: 105.67	,
"New York Knicks"	: 111.36	}

In [89]:
def transform1(data):
    shot_type = pd.get_dummies(data["Shot Type"])
    q = pd.get_dummies(data["Q"])
    shot_dist = data["Shot Dist."].apply(lambda x : x.replace("ft.", ""))
    d_rating = data["Opp."].apply(lambda x: defensive_rating[x])
    opp = pd.get_dummies(data["Opp."])
#     defender = pd.get_dummies(data["Defender"])
    

    new_shot_chart = pd.concat([ d_rating,shot_type, q , shot_dist, data["Shot Clock"],data["Touch Time"],
                                data["Drib."],data["Def Dist."], (data["Made?"]=="Yes").astype(int)], axis=1)
    return new_shot_chart

In [94]:
transformed = transform1(df)
print transformed.columns
predictors = transformed.columns[:-1]
X_train, X_test, y_train, y_test, model = train_test_splitter(logistic, transformed[predictors], transformed["Made?"])
print  model.score(X_test, y_test)

Index([u'Opp.', u'Driving Bank shot', u'Driving Finger Roll Layup Shot',
       u'Driving Jump shot', u'Driving Layup Shot',
       u'Driving Reverse Layup Shot', u'Driving Slam Dunk Shot', u'Dunk Shot',
       u'Fadeaway Bank shot', u'Fadeaway Jump Shot', u'Finger Roll Layup Shot',
       u'Floating Jump shot', u'Jump Bank Shot', u'Jump Shot', u'Layup Shot',
       u'Pullup Bank shot', u'Pullup Jump shot', u'Putback Layup Shot',
       u'Reverse Layup Shot', u'Running Bank shot',
       u'Running Finger Roll Layup Shot', u'Running Hook Shot',
       u'Running Jump Shot', u'Running Layup Shot',
       u'Running Reverse Layup Shot', u'Slam Dunk Shot',
       u'Step Back Jump shot', u'Turnaround Bank shot',
       u'Turnaround Fadeaway shot', u'Turnaround Hook Shot',
       u'Turnaround Jump Shot', u'1', u'2', u'3', u'4', u'5', u'Shot Dist.',
       u'Shot Clock', u'Touch Time', u'Drib.', u'Def Dist.', u'Made?'],
      dtype='object')
0.637853949329
