## Kobe Bryant Shot Selection Project

### Team Members: Kit Ha, Vinh Ngo, Johnathan Tahod, Anthony Collado

#### Credit to Kaggle for data and idea. Link to webpage below
#### https://www.kaggle.com/c/kobe-bryant-shot-selection
#### Github: https://github.com/johntahod/KobeBryant_ShotSelection/tree/master

##### *Markdown conventions just to be consistent
# Title
## Headers
### Sections
#### Subpoints? idk
##### Notes/Comments

### Imports and Setting up the data frame + first look at data

In [35]:
import numpy as np
import pandas as pd

kobe_data = pd.read_csv('data.csv')
# dislpay(kobe_data.head(10))

full_features = [kobe_data.columns]
print(full_features)

[Index(['action_type', 'combined_shot_type', 'game_event_id', 'game_id', 'lat',
       'loc_x', 'loc_y', 'lon', 'minutes_remaining', 'period', 'playoffs',
       'season', 'seconds_remaining', 'shot_distance', 'shot_made_flag',
       'shot_type', 'shot_zone_area', 'shot_zone_basic', 'shot_zone_range',
       'team_id', 'team_name', 'game_date', 'matchup', 'opponent', 'shot_id'],
      dtype='object')]


### Setting up Lists

In [36]:
# Target List (this is our y)
target = ['shot_made_flag']

#### These are features we will not be using.
team_name: This feature will not be used because Kobe has only ever played for the Lakers so this provides no new data.

team_id: This is the same as team_name

matchup: This tells the same information as opponent (eg. Lakers vs Portland)

game_event_id: Don't need to id the game

game_id: Same reason as above

shot_id: Don't need to id the shot

In [37]:
# Features that we will not be using.
unused_features = ['team_name', 'team_id', 'matchup', 'game_event_id', 'game_id', 'shot_id']

#### Removing unused features

In [38]:
removed_features = unused_features
# print(removed_features)

feature_cols = [feature for feature in kobe_data.columns if feature not in removed_features]
# print(feature_cols)

#### Remove spaces from data for fit training

In [39]:
# Not sure who did this but I commented it out. (Maybe this is me. ¯\_(ツ)_/¯ ) -Kit

# from sklearn.preprocessing import LabelEncoder
# y2 = ["A","1","4","F","A","1","4","F"]
# lb = LabelEncoder()
# y = lb.fit_transform(y2)

### Implement One-Hot-Encoding

In [40]:
# Features for One-Hot-Encoding
OHE_features = ['action_type', 'combined_shot_type', 'season', 'shot_type', 'shot_zone_area', 'shot_zone_basic', 'shot_zone_range']

In [41]:
# import for one hot encoding
from sklearn.preprocessing import OneHotEncoder
dummies = pd.DataFrame()

for feature in OHE_features:
    dummies = pd.concat([dummies, pd.get_dummies(kobe_data[feature])], axis = 'columns')
    dummies.drop(dummies.columns[[-1,]], axis=1, inplace=True)
    
display(dummies.head(10))

Unnamed: 0,Alley Oop Dunk Shot,Alley Oop Layup shot,Cutting Finger Roll Layup Shot,Cutting Layup Shot,Driving Bank shot,Driving Dunk Shot,Driving Finger Roll Layup Shot,Driving Finger Roll Shot,Driving Floating Bank Jump Shot,Driving Floating Jump Shot,...,Above the Break 3,Backcourt,In The Paint (Non-RA),Left Corner 3,Mid-Range,Restricted Area,16-24 ft.,24+ ft.,8-16 ft.,Back Court Shot
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
4,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [42]:
merged = pd.concat([kobe_data, dummies], axis='columns')

display(merged.head(10))

Unnamed: 0,action_type,combined_shot_type,game_event_id,game_id,lat,loc_x,loc_y,lon,minutes_remaining,period,...,Above the Break 3,Backcourt,In The Paint (Non-RA),Left Corner 3,Mid-Range,Restricted Area,16-24 ft.,24+ ft.,8-16 ft.,Back Court Shot
0,Jump Shot,Jump Shot,10,20000012,33.9723,167,72,-118.1028,10,1,...,0,0,0,0,1,0,1,0,0,0
1,Jump Shot,Jump Shot,12,20000012,34.0443,-157,0,-118.4268,10,1,...,0,0,0,0,1,0,0,0,1,0
2,Jump Shot,Jump Shot,35,20000012,33.9093,-101,135,-118.3708,7,1,...,0,0,0,0,1,0,1,0,0,0
3,Jump Shot,Jump Shot,43,20000012,33.8693,138,175,-118.1318,6,1,...,0,0,0,0,1,0,1,0,0,0
4,Driving Dunk Shot,Dunk,155,20000012,34.0443,0,0,-118.2698,6,2,...,0,0,0,0,0,1,0,0,0,0
5,Jump Shot,Jump Shot,244,20000012,34.0553,-145,-11,-118.4148,9,3,...,0,0,0,0,1,0,0,0,1,0
6,Layup Shot,Layup,251,20000012,34.0443,0,0,-118.2698,8,3,...,0,0,0,0,0,1,0,0,0,0
7,Jump Shot,Jump Shot,254,20000012,34.0163,1,28,-118.2688,8,3,...,0,0,0,0,0,1,0,0,0,0
8,Jump Shot,Jump Shot,265,20000012,33.9363,-65,108,-118.3348,6,3,...,0,0,1,0,0,0,0,0,1,0
9,Running Jump Shot,Jump Shot,294,20000012,33.9193,-33,125,-118.3028,3,3,...,0,0,1,0,0,0,0,0,1,0


In [43]:
final = merged.drop(OHE_features, axis='columns')
final_features = [final.columns]
display(final.head(10))

Unnamed: 0,game_event_id,game_id,lat,loc_x,loc_y,lon,minutes_remaining,period,playoffs,seconds_remaining,...,Above the Break 3,Backcourt,In The Paint (Non-RA),Left Corner 3,Mid-Range,Restricted Area,16-24 ft.,24+ ft.,8-16 ft.,Back Court Shot
0,10,20000012,33.9723,167,72,-118.1028,10,1,0,27,...,0,0,0,0,1,0,1,0,0,0
1,12,20000012,34.0443,-157,0,-118.4268,10,1,0,22,...,0,0,0,0,1,0,0,0,1,0
2,35,20000012,33.9093,-101,135,-118.3708,7,1,0,45,...,0,0,0,0,1,0,1,0,0,0
3,43,20000012,33.8693,138,175,-118.1318,6,1,0,52,...,0,0,0,0,1,0,1,0,0,0
4,155,20000012,34.0443,0,0,-118.2698,6,2,0,19,...,0,0,0,0,0,1,0,0,0,0
5,244,20000012,34.0553,-145,-11,-118.4148,9,3,0,32,...,0,0,0,0,1,0,0,0,1,0
6,251,20000012,34.0443,0,0,-118.2698,8,3,0,52,...,0,0,0,0,0,1,0,0,0,0
7,254,20000012,34.0163,1,28,-118.2688,8,3,0,5,...,0,0,0,0,0,1,0,0,0,0
8,265,20000012,33.9363,-65,108,-118.3348,6,3,0,12,...,0,0,1,0,0,0,0,0,1,0
9,294,20000012,33.9193,-33,125,-118.3028,3,3,0,36,...,0,0,1,0,0,0,0,0,1,0


#### Y value splits

In [44]:
# Data frame but with the prediction rwos removed (rows with a Nan value in the target column)
y = kobe_data[target].dropna()
display(y.head(10))


Unnamed: 0,shot_made_flag
1,0.0
2,1.0
3,0.0
4,1.0
5,0.0
6,1.0
8,1.0
9,0.0
10,0.0
11,1.0


#### X value splits

In [45]:
# Data with all prediction data dropped (so that we don't learn from it)
X = final.dropna(subset=target)[final_features]
display(X.head(10))


KeyError: "None of [Index([('game_event_id', 'game_id', 'lat', 'loc_x', 'loc_y', 'lon', 'minutes_remaining', 'period', 'playoffs', 'seconds_remaining', 'shot_distance', 'shot_made_flag', 'team_id', 'team_name', 'game_date', 'matchup', 'opponent', 'shot_id', 'Alley Oop Dunk Shot', 'Alley Oop Layup shot', 'Cutting Finger Roll Layup Shot', 'Cutting Layup Shot', 'Driving Bank shot', 'Driving Dunk Shot', 'Driving Finger Roll Layup Shot', 'Driving Finger Roll Shot', 'Driving Floating Bank Jump Shot', 'Driving Floating Jump Shot', 'Driving Hook Shot', 'Driving Jump shot', 'Driving Layup Shot', 'Driving Reverse Layup Shot', 'Driving Slam Dunk Shot', 'Dunk Shot', 'Fadeaway Bank shot', 'Fadeaway Jump Shot', 'Finger Roll Layup Shot', 'Finger Roll Shot', 'Floating Jump shot', 'Follow Up Dunk Shot', 'Hook Bank Shot', 'Hook Shot', 'Jump Bank Shot', 'Jump Hook Shot', 'Jump Shot', 'Layup Shot', 'Pullup Bank shot', 'Pullup Jump shot', 'Putback Dunk Shot', 'Putback Layup Shot', 'Putback Slam Dunk Shot', 'Reverse Dunk Shot', 'Reverse Layup Shot', 'Reverse Slam Dunk Shot', 'Running Bank shot', 'Running Dunk Shot', 'Running Finger Roll Layup Shot', 'Running Finger Roll Shot', 'Running Hook Shot', 'Running Jump Shot', 'Running Layup Shot', 'Running Pull-Up Jump Shot', 'Running Reverse Layup Shot', 'Running Slam Dunk Shot', 'Running Tip Shot', 'Slam Dunk Shot', 'Step Back Jump shot', 'Tip Layup Shot', 'Turnaround Bank shot', 'Turnaround Fadeaway Bank Jump Shot', 'Turnaround Fadeaway shot', 'Turnaround Finger Roll Shot', 'Turnaround Hook Shot', 'Bank Shot', 'Dunk', 'Hook Shot', 'Jump Shot', 'Layup', '1996-97', '1997-98', '1998-99', '1999-00', '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2PT Field Goal', 'Back Court(BC)', 'Center(C)', ...)], dtype='object')] are in the [columns]"

#### Prediction data.
#### DO NOT USE IN TRAINING!

In [51]:
final_prediction_data = kobe_data[kobe_data['shot_made_flag'].isnull()]
# print(final_prediction_data)


# Second way to write this
# nan_rows = kobe_data[kobe_data[target].isnull().any(1)]
# print(nan_rows)

### Finding the best single feature for predicting shot made using KNN

In [52]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=10)

In [59]:
k = 3
knn = KNeighborsClassifier(n_neighbors=k)

In [60]:
knn.fit(X_train, y_train)

ValueError: could not convert string to float: 'Jump Shot'

In [None]:
y_predict = knn.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_predict)

print(accuracy)

[1, 2, 3]


[4, 1]


[1, 2, 3, 4, 1]
