In [1]:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib import  offsetbox as osb
from matplotlib.cbook import get_sample_data
from matplotlib._png import read_png
from io import BytesIO, StringIO
from PIL import Image

import scipy
import numpy as np
import seaborn as sns
import pandas as pd

import plot_court
import lgcp_func as lgcp
import nmf_func as nmf
import sklearn.model_selection as sklearnMS


import requests

import sys
import subprocess
import os
import simplejson as json
from pprint import pprint

from cache_func import cached



In [34]:
def train_test_split_player_shotInfo(randSeed = 546682):
    shots_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '/data/shots/shots.csv'
    shots_dat = pd.read_csv(shots_path)
    
#    headers_2015 = ['GRID_TYPE', 'GAME_ID', 'GAME_EVENT_ID', 
#                       'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_NAME', 
#                       'PERIOD', 'MINUTES_REMAINING', 'SECONDS_REMAINING', 
#                       'EVENT_TYPE', 'ACTION_TYPE', 'SHOT_TYPE',
#                       'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 
#                       'SHOT_ZONE_RANGE', 'SHOT_DISTANCE',
#                       'LOC_X', 'LOC_Y', 
#                       'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 
#                       'GAME_DATE', 'HTM', 'VTM']       # HTM = Home team, VTM = Visiting team
    
    df = pd.DataFrame(shots_dat, 
                      columns = ['PLAYER_ID', 'PLAYER_NAME', 
                                 'TEAM_ID', 'TEAM_NAME', 'ACTION_TYPE',
                                 'SHOT_DISTANCE', 'SHOT_TYPE', 
                                 'LOC_X', 'LOC_Y', 
                                 'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG'])
    
    num_players = 300
    top_players_shotNum = df.PLAYER_NAME.value_counts()[:num_players]
    top_players_nameList = top_players_shotNum.index.tolist()

    train_players_df = {}
    test_players_df = {}
    playersID = {}
    for i, player in enumerate(set(top_players_nameList)):  
        player_df = df[df.PLAYER_NAME == player]
        train_players_df[player], test_players_df[player] = \
                sklearnMS.train_test_split(player_df, test_size=0.2, random_state=randSeed)
        playersID[player] = player_df.PLAYER_ID.unique()[0]
        
    return train_players_df, test_players_df, top_players_nameList, playersID

#######################

def gen_players_shotHist(players_df, top_players_nameList, flag='SHOT_ATTEMPTED_FLAG'):
    bins, binRange = ([25,18], [[-250,250], [-47.5,312.5]])
    
    player_shotHist = {}
    for i, player in enumerate(top_players_nameList):  
        temp = players_df[player]
        hist2d, xedges, yedges, binnumber = scipy.stats.binned_statistic_2d(temp.LOC_X, temp.LOC_Y, 
                                                                            temp[flag],
                                                                            statistic='sum',
                                                                            bins=bins, 
                                                                            range=binRange)
        player_shotHist[player] = hist2d.flatten() 
    return player_shotHist, (bins, binRange, xedges, yedges, binnumber)

In [3]:
# =============================================================================
# FUNCTIONS: Plot histograms
# =============================================================================    
        
    
def acquire_playerPic(PlayerID):
    url = "http://stats.nba.com/media/players/230x185/"+str(PlayerID)+".png"
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    return img
        


def plot_shotHist(player_shotHist, player, binDat, fileName, 
                  title='', norm_Opt='linear', plot_size=(5,5)):   
    bins, binRange, xedges, yedges, binnumber = binDat
    Xn_v = player_shotHist
    temp = np.array(Xn_v, dtype='float')/np.sum(Xn_v)
    shotHist_v = np.reshape(temp, bins)
    ##########
    extent = np.min(xedges), np.max(xedges), np.max(yedges), np.min(yedges)
    
    fig = plt.figure(figsize=plot_size)
    ax = plt.axes()
    
    plot_court.draw_court(outer_lines=True, lw=1.5)
    
    
    cmap = plt.cm.magma_r
    if norm_Opt == 'log':
        plt.imshow(shotHist_v.T, cmap=cmap, 
                   norm=colors.LogNorm(vmin=1e-4, vmax=1e-1),
                   alpha=.85, extent=extent)
    else:
        plt.imshow(np.ma.masked_where(shotHist_v.T == 0., shotHist_v.T), 
                   cmap=cmap, alpha=.85, extent=extent)
    
    ax.set_xlim([-300,300])
    ax.set_ylim([-100,500])
    ax.grid('off')
    ax.axis('off')
    ax.set_title('%s: %s'%(player, title), fontsize=15)
#    plt.axis('off')
    fig.tight_layout()
    fig.savefig(fileName, dpi=700)


In [4]:
def gen_shotHist_plots(player, phi=30, seed=546682):
    dirName = 'SHOT_ATTEMPTED_FLAG/shotHist_LGCP_phi%d_seed%d/'%(phi,seed)
    fileName = 'norm_lambda_%s.txt'%player
    
    outfileName = 'lgcp_shotHist_%s.png'%player
    plot_shotHist(np.loadtxt(dirName + fileName), player, binDat, 
                  outfileName, title='LGCP', norm_Opt='log')
    
    outfileName = 'raw_shotHist_%s.png'%player
    plot_shotHist(players_shotHist_train[player], player, binDat, 
                  outfileName, title='raw', norm_Opt='log')

In [35]:
randSeed = 546682

train_players_df, test_players_df, top_players_nameList, playersID = \
        train_test_split_player_shotInfo(randSeed = randSeed)
playersName = dict((v,k) for k,v in playersID.items())
# playersID-- key: name, value: ID
# playersName-- key: ID, value: name


flag_name = 'SHOT_ATTEMPTED_FLAG'



players_shotAttm_train, binDat = gen_players_shotHist(train_players_df, 
                                                      top_players_nameList,
                                                      flag=flag_name)
players_shotAttm_test, binDat = gen_players_shotHist(test_players_df, 
                                                     top_players_nameList,
                                                     flag=flag_name)
bins, binRange, xedges, yedges, binnumber = binDat

In [36]:
flag_name = 'SHOT_MADE_FLAG'



players_shotMade_train, binDat = gen_players_shotHist(train_players_df, 
                                                      top_players_nameList,
                                                      flag=flag_name)
players_shotMade_test, binDat = gen_players_shotHist(train_players_df, 
                                                     top_players_nameList,
                                                     flag=flag_name)
bins, binRange, xedges, yedges, binnumber = binDat

In [39]:
[[players_shotAttm_train[i].sum(), players_shotMade_train[i].sum(), 
  players_shotMade_train[i].sum()/players_shotAttm_train[i].sum()]
 for i in top_players_nameList]

[[1291.0, 567.0, 0.43919442292796285],
 [1259.0, 643.0, 0.5107227958697379],
 [1173.0, 495.0, 0.42199488491048592],
 [1153.0, 473.0, 0.41023417172593235],
 [1150.0, 524.0, 0.45565217391304347],
 [1143.0, 503.0, 0.44006999125109364],
 [1130.0, 594.0, 0.52566371681415924],
 [1104.0, 507.0, 0.45923913043478259],
 [1105.0, 482.0, 0.43619909502262444],
 [1103.0, 555.0, 0.50317316409791479],
 [1096.0, 489.0, 0.44616788321167883],
 [1074.0, 464.0, 0.43202979515828677],
 [1062.0, 479.0, 0.45103578154425611],
 [1061.0, 452.0, 0.42601319509896324],
 [1045.0, 458.0, 0.43827751196172249],
 [1034.0, 470.0, 0.45454545454545453],
 [989.0, 417.0, 0.42163801820020225],
 [960.0, 410.0, 0.42708333333333331],
 [950.0, 410.0, 0.43157894736842106],
 [942.0, 433.0, 0.45966029723991508],
 [924.0, 479.0, 0.51839826839826841],
 [921.0, 501.0, 0.5439739413680782],
 [912.0, 403.0, 0.44188596491228072],
 [906.0, 446.0, 0.49227373068432673],
 [888.0, 322.0, 0.36261261261261263],
 [886.0, 410.0, 0.46275395033860045]

In [32]:
player = 'James Harden'
train_df = train_players_df[player]
train_df[train_df.SHOT_MADE_FLAG==1]

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,ACTION_TYPE,SHOT_DISTANCE,SHOT_TYPE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG
52471,201935,James Harden,1610612745,Houston Rockets,Jump Shot,23,3PT Field Goal,159,178,1,1
51098,201935,James Harden,1610612745,Houston Rockets,Jump Shot,25,3PT Field Goal,-153,200,1,1
52383,201935,James Harden,1610612745,Houston Rockets,Layup Shot,1,2PT Field Goal,9,8,1,1
51221,201935,James Harden,1610612745,Houston Rockets,Pullup Jump shot,25,3PT Field Goal,-1,257,1,1
51159,201935,James Harden,1610612745,Houston Rockets,Driving Layup Shot,3,2PT Field Goal,14,31,1,1
51225,201935,James Harden,1610612745,Houston Rockets,Driving Layup Shot,2,2PT Field Goal,-22,0,1,1
52021,201935,James Harden,1610612745,Houston Rockets,Jump Shot,26,3PT Field Goal,82,247,1,1
51986,201935,James Harden,1610612745,Houston Rockets,Jump Shot,25,3PT Field Goal,205,159,1,1
51785,201935,James Harden,1610612745,Houston Rockets,Driving Layup Shot,1,2PT Field Goal,-19,-1,1,1
52435,201935,James Harden,1610612745,Houston Rockets,Cutting Finger Roll Layup Shot,1,2PT Field Goal,-11,2,1,1
