In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import utils
%matplotlib inline

In [2]:
df = pd.read_csv('shots_with_player_data.csv')

In [13]:
print df.columns
# df

Index([u'Unnamed: 0', u'GAME_ID', u'MATCHUP', u'LOCATION', u'W',
       u'FINAL_MARGIN', u'SHOT_NUMBER', u'PERIOD', u'GAME_CLOCK',
       u'SHOT_CLOCK', u'DRIBBLES', u'TOUCH_TIME', u'SHOT_DIST', u'PTS_TYPE',
       u'SHOT_RESULT', u'CLOSEST_DEFENDER', u'CLOSEST_DEFENDER_PLAYER_ID',
       u'CLOSE_DEF_DIST', u'FGM', u'PTS', u'player_name', u'player_id',
       u'FG3A', u'FG_PCT', u'FG3M', u'FG3_PCT', u'FGA'],
      dtype='object')


In [3]:
# Choose the features
features = [
    "SHOT_NUMBER",
    "PERIOD",
    "SHOT_CLOCK",
    "DRIBBLES",
    "TOUCH_TIME",
    "SHOT_DIST",
    "PTS_TYPE",
    "CLOSE_DEF_DIST",
    "FG3A",
    "FG3M",
    "FG3_PCT",
    "FGA",  # FG attempted per game
    "FGM",  # FG made per game
    "FG_PCT"
]


target = ['PTS']

In [4]:
# Run normalization functions
df['SHOT_CLOCK'] = utils.normalize_shotclock(df['SHOT_CLOCK'])
df['LOCATION'] = utils.normalize_location(df['LOCATION'])
df['PTS'] = np.float64(df['PTS'])
features.remove("PTS_TYPE")
df = utils.rescale_features(features, df, [])
features.append("PTS_TYPE")

In [5]:
from sklearn.neural_network import MLPClassifier

In [29]:
clf = MLPClassifier(hidden_layer_sizes=(len(features), len(features), 5), activation='tanh')
print "Score %.4f (+/- %.4f)" % utils.get_cross_validated_score(df[features], df[target], clf)
# Result: Advanced features ~ Score 0.6179 (+/- 0.0042) is better than our last best of 0.6141 +/- 0.0061

Score 0.6179 (+/- 0.0042)


In [23]:
# 3pt/2pt classifier split
df_3pts = df[df['PTS_TYPE'] == 3.0]
df_2pts = df[df['PTS_TYPE'] == 2.0]

In [37]:
clf = MLPClassifier(hidden_layer_sizes=(len(features), len(features), 5, 5, 5), activation='tanh')
print "Score %.4f (+/- %.4f)" % utils.get_cross_validated_score(df_3pts[features], df_3pts[target], clf)
# Result: 3pts w/ advanced features: Score 0.6486 (+/- 0.0002)) vs Score 0.6485 (+/- 0.0003) without advanced

Score 0.6487 (+/- 0.0006)


In [39]:
clf = MLPClassifier(hidden_layer_sizes=(len(features), len(features), 5, 5, 5), activation='tanh')
print "Score %.4f (+/- %.4f)" % utils.get_cross_validated_score(df_2pts[features], df_2pts[target], clf)
# Result: 2pts w/ advanced features: Score 0.6076 (+/- 0.0055) vs Score 0.6072 (+/- 0.0057)

Score 0.6076 (+/- 0.0055)


In [7]:
# One hot players
players = {player: [] for player in df['player_name']}
for player in df['player_name']:
    players[player].append(1)
    for p in players:
        if p != player:
            players[p].append(0)
for p in players:
    df[p] = players[p]
    features.append(p)

In [8]:
import time

In [9]:
start = time.time()
clf = MLPClassifier(hidden_layer_sizes=(len(features)), activation='tanh')
print "Score %.4f (+/- %.4f)" % utils.get_cross_validated_score(df[features], df[target], clf)
end = time.time()
print end - start

Score 0.5329 (+/- 0.0150)
834.833921909
