In [1]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
%matplotlib inline
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

import shotpredictor

import random

In [2]:
import json
from pprint import pprint

def getGameJSON(path):
    with open(path) as data_file:    
        data = json.load(data_file)
    return data
        
data = getGameJSON('GSWvsCLE.json')


In [3]:
headers = ["team_id", "player_id", "x_loc", "y_loc", 
           "radius"]

def playerDFtoList(frame):
    xList = frame['x_loc'].tolist()
    yList = frame['y_loc'].tolist()
    bothList = []
    for i in range(len(xList)):
        bothList.append([ xList[i], yList[i] ])
    return np.array(bothList)

def getMoments(data):
    player_moments = []
    for event in data['events']:
        for moment in event['moments']:
            for player in moment[5]:
                player_moments.append(player)

    df = pd.DataFrame(player_moments, columns=headers)
    return df

In [57]:
min_5 = 25*60*6*11
index = np.arange(0, min_5/11, 1)

def getDistBetween(a,b):
    return np.sqrt((a["x_loc"] - b["x_loc"])**2 + (a["y_loc"] - b["y_loc"])**2)

def dist(data, ball):
    data["distanceToBall"] = getDistBetween(data, ball)    
    
def defDist(l):
    # Order: [steph, klay, bogut, green, barnes, lebron, smith, love, irving, mozgov]
    for player in l:
        if (player['team_id'][0] == 1610612739):
            # CLE player, get dist to all GSW players
            player['distToCurry'] = getDistBetween(player, l[0])
            player['distToThompson'] = getDistBetween(player, l[1])
            player['distToBogut'] = getDistBetween(player, l[2])
            player['distToGreen'] = getDistBetween(player, l[3])
            player['distToBarnes'] = getDistBetween(player, l[4])
            player['distToNearestDef'] = player.loc[:, ['distToCurry', 'distToThompson', 'distToBogut', 'distToGreen', 'distToBarnes']].min(axis=1)
            # Drop columns 7-11
            player.drop(player.columns[[7, 8, 9, 10, 11]], axis=1, inplace=True)            
        elif (player['team_id'][0] == 1610612744):
            # GSW player, get dist to all CLE players
            player['distToJames'] = getDistBetween(player, l[5])
            player['distToSmith'] = getDistBetween(player, l[6])
            player['distToLove'] = getDistBetween(player, l[7])
            player['distToIrving'] = getDistBetween(player, l[8])
            player['distToMozgov'] = getDistBetween(player, l[9])
            player['distToNearestDef'] = player.loc[:, ['distToJames', 'distToSmith', 'distToLove', 'distToIrving', 'distToMozgov']].min(axis=1)
            # Drop columns 7-11
            player.drop(player.columns[[7, 8, 9, 10, 11]], axis=1, inplace=True)
            
def hasBall(data):
    data["hasBall"] = data["distanceToBall"].apply(lambda x: 1 if x < 2.5 else 0)
    
def transform(l, ball):
    ball.reset_index(drop=True, inplace =True)    
    result = []
    for player in l:
        player.reset_index(drop=True, inplace =True)
        dist(player, ball)
        hasBall(player)
        result.append(player[(player.hasBall == 1)][["player_id", "team_id", "x_loc", "y_loc"]])
    defDist(l)
    df_res = pd.concat(result)
    pos = df_res.sort_index()
    idx = np.unique(pos.index, return_index=True)[1]
    pos= pos.iloc[idx]

    pos = pos.reindex(index, fill_value=0)
    return pos

def removeRepeats(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]        

In [58]:
# Get moments from the zeroth event
player_moments = []
endsOfEvents = []
counter = -1
for event in data['events']:
    for moment in event['moments']:
        counter += 1
        for player in moment[5]:
            player_moments.append(player)
    endsOfEvents.append(counter)

df = pd.DataFrame(player_moments, columns=headers)
df = df.head(min_5)
endsOfEvents = removeRepeats(endsOfEvents)

In [59]:
# Get specific player's movements
ball = df[df.player_id==-1]

steph = df[df.player_id==201939]
klay = df[df.player_id==202691]
bogut = df[df.player_id==101106]
green = df[df.player_id==203110]
barnes = df[df.player_id==203084]

lebron = df[df.player_id==2544]
smith = df[df.player_id==2747]
love = df[df.player_id==201567]
irving = df[df.player_id==202681]
mozgov = df[df.player_id==202389]

# convert them to numpy arrays
ballA = playerDFtoList(ball)

stephA = playerDFtoList(steph)
klayA = playerDFtoList(klay)
bogutA = playerDFtoList(bogut)
greenA = playerDFtoList(green)
barnesA = playerDFtoList(barnes)

lebronA = playerDFtoList(lebron)
smithA = playerDFtoList(smith)
loveA = playerDFtoList(love)
irvingA = playerDFtoList(irving)
mozgovA = playerDFtoList(mozgov)

#Get ball radiuses
radii = ball['radius'].tolist()

In [60]:
l = [steph,klay,bogut ,green ,barnes ,lebron,smith,love,irving,mozgov]

pos = transform(l, ball)
a_pos = playerDFtoList(pos)

In [61]:
steph.head()

Unnamed: 0,team_id,player_id,x_loc,y_loc,radius,distanceToBall,hasBall,distToNearestDef
0,1610612744,201939,22.98616,42.65101,0,6.288762,0,5.652511
1,1610612744,201939,22.91989,42.61257,0,6.205769,0,5.629891
2,1610612744,201939,22.85303,42.57467,0,6.105101,0,5.608006
3,1610612744,201939,22.79063,42.54858,0,5.978324,0,5.571235
4,1610612744,201939,22.72529,42.51892,0,5.839616,0,5.540784


In [8]:
# Get the indexes that overlap
skipRanges = []
# Only get the first 17 indices b/c we're only doing the first 5 minutes of the game
for eventEndIndex in endsOfEvents[:18]:
    lastLocation = ballA[eventEndIndex]
#     print '       Event end at '+str(eventEndIndex)+':',lastLocation
    for i in range(eventEndIndex+1,len(ballA[eventEndIndex+1:])):
        if (ballA[i] == lastLocation).all():
#             print 'matched location at '+str(i)+':',ballA[i]
            skipRanges.append([eventEndIndex, i])
            break
print 'Ranges to skip:',skipRanges

Ranges to skip: [[149, 299], [922, 1360], [1360, 1510], [1885, 2410], [2410, 2560], [2885, 3360], [3360, 3510], [3881, 4031]]


In [9]:
def distance(ball):
    basket1 = [5.32, 24.8]
    basket2 = [88.55, 24.8]
    halfcourt = 47.0
    
    ball['cavsHoop'] = np.sqrt((basket1[0] - ball['x_loc'])**2 + (basket1[1] - ball['y_loc'])**2)
    ball['warriorsHoop'] = np.sqrt((basket2[0] - ball['x_loc'])**2 + (basket2[1] - ball['y_loc'])**2)
    
distance(ball)

In [12]:
pos = pd.concat([pos, ball[["cavsHoop", "warriorsHoop"]]], axis=1)

In [13]:
dic = {201939 : "stephen curry",
202691 : "klay thompson",
101106 : "andrew bogut",
203110 : "draymond green",
203084 : "harrison barnes",
2544 : "lebron james",
2747 : "j.r. smith",
201567 : "kevin love",
202681 : "kyrie irving",
202389 : "timofey mozgov"}
team = {"1610612744":"Warriors",
"1610612739": "Cavaliers"}

In [14]:
def shot_dist(dist):
    if dist < 8:
        return "less than 8"
    elif dist < 16:
        return "8-16"
    elif dist < 24:
        return "16-24"
    else:
        return "24+"
    
def shot_decide(dist):
    if dist == 0:
        return 0
    elif dist < 8:
        if random.randint(0, 4) == 0:
            return "else"
        else:
            return "layup"
    else:
        return "jump"
    
def addParameters(data):
    data = data.copy()
    a = data["cavsHoop"][data["team_id"] == 1610612739]
    b= data["warriorsHoop"][data["team_id"] == 1610612744]
#     add distanceToBasket
    data["distanceToBasket"] = pd.concat([a,b]).reindex(index, fill_value=0)
    
    shot_dist_c = pd.get_dummies(data["distanceToBasket"].apply(shot_dist))
    
    shot_decide_c = pd.get_dummies(data["distanceToBasket"].apply(shot_decide))
    
    data["player"] = data["player_id"].apply(lambda x: 0 if x == 0 else dic[x])
    
    
    
#     return pd.concat([data, shot_dist_c,shot_decide_c], axis =1)
    return pd.concat([data,shot_decide_c], axis =1)
#     return shot_dist_c
    

In [15]:
final = addParameters(pos)

In [19]:
final.head(29)

Unnamed: 0,player_id,team_id,x_loc,y_loc,cavsHoop,warriorsHoop,distanceToBasket,player,0,else,jump,layup
0,202681,1610612739,27.87412,45.4898,31.118304,63.353148,31.118304,kyrie irving,0,0,1,0
1,202681,1610612739,27.79347,45.43106,30.948569,63.424316,30.948569,kyrie irving,0,0,1,0
2,202681,1610612739,27.71793,45.36438,30.765752,63.520513,30.765752,kyrie irving,0,0,1,0
3,202681,1610612739,27.61261,45.33913,30.57115,63.641289,30.57115,kyrie irving,0,0,1,0
4,202681,1610612739,27.51496,45.30448,30.36609,63.786162,30.36609,kyrie irving,0,0,1,0
5,202681,1610612739,27.42254,45.26554,30.151955,63.954666,30.151955,kyrie irving,0,0,1,0
6,202681,1610612739,27.33418,45.22266,30.145333,63.95541,30.145333,kyrie irving,0,0,1,0
7,202681,1610612739,27.25215,45.17668,30.125077,63.957434,30.125077,kyrie irving,0,0,1,0
8,202681,1610612739,27.13954,45.15537,30.090595,63.960475,30.090595,kyrie irving,0,0,1,0
9,202681,1610612739,27.03458,45.13076,30.041336,63.964256,30.041336,kyrie irving,0,0,1,0


In [22]:
model_dic[203110].predict_proba([82, 0, 1 ,0])

AttributeError: 'int' object has no attribute 'predict_proba'

In [24]:
curry, thompson, bogut, green, barnes, james, smith, love, irving, timofey= 0,0,0,0,0,0,0,0,0,0
model_dic = {201939 : curry,
202691 : thompson,
101106 : bogut,
203110 : green,
203084 : barnes,
2544 : james,
2747 : smith,
201567 : love,
202681 : irving,
202389 :timofey }

In [25]:
for player_id in dic:
    print dic[player_id]
    model_dic[player_id] = shotpredictor.predictor(dic[player_id], "2014")

lebron james
andrew bogut
stephen curry
timofey mozgov
draymond green
kyrie irving
j.r. smith
harrison barnes
klay thompson
kevin love


In [26]:
pred = final[['distanceToBasket', 'else', 'jump', 'layup']]
pct = [] 
for i,j in pred.iterrows():
    if final["player_id"][i] == 0:
        pct.append(0)
    else:
        pct.append(model_dic[final["player_id"][i]].predict_proba(j.tolist())[0][1])
final["pct"] = pct



In [39]:
ex = {}
ex["Ball"] = ballA.tolist()

ex["Curry"] = stephA.tolist()
ex["Thompson"] = klayA.tolist()
ex["Bogut"] = bogutA.tolist()
ex["Green"] = greenA.tolist()
ex["Barnes"] = barnesA.tolist()


ex["James"] = lebronA.tolist()
ex["Smith"] = smithA.tolist()
ex["Love"] = loveA.tolist()
ex["Irving"] = irvingA.tolist()
ex["Mozgov"] = mozgovA.tolist()

ex["radius"] = radii

ex["pos"] = a_pos.tolist()

ex["pct"] = final["pct"].tolist()

# print ex['steph'][148:152]
# print ex['steph'][299:301]

# Rebuild the dictionary with skip ranges
for player in ex:
    # Loop through the skipRanges, going in reverse so we don't mess up indices
    for thisRange in reversed(skipRanges):
        # Delete the indices to skip
        del ex[player][thisRange[0]:thisRange[1]]

In [40]:
with open("./website/public/big_dump.json", "w") as outfile:
    json.dump(ex, outfile)