In [50]:
from nba_help import tracking, pbp, plotting, get_basic_statistics, constants
import numpy as np
import json
import math
import os
import time
import requests
import pandas as pd
from tqdm.auto import tqdm

import matplotlib.pyplot as plt

In [51]:
TRACKING_DIRECTORY = "/home/avyayv/data/nba/rawtrackingfiles/data/csv/"
PBP_DIRECTORY = "/home/avyayv/data/nba/playbyplay/"

In [52]:
def get_times_from_pbp(gid):
    ok = pd.DataFrame(json.loads(open("/home/avyayv/data/nba/playbyplay/"+gid+".json").read())['resultSets'][0]['rowSet'])
    periods = ok[4]
    seconds = ok[6]
    for i, (prd,sec) in enumerate(list(zip(periods,seconds))):
        ok.at[i, 'seconds'] = pbp.get_seconds(int(prd), (sec))
        
    made_shots = ok.loc[ok[2]==1].copy().reset_index()
    missed_shots = ok.loc[ok[2]==2].copy().reset_index()
    turnovers = ok.loc[ok[2]==5].copy().reset_index()
    
    made_shots['assisted'] = np.array(made_shots[7].str.contains('AST'), dtype=bool) | np.array(made_shots[9].str.contains('AST'), dtype=bool)
        
    for i, row in made_shots.iterrows():
        if row[7] != None:
            made_shots.at[i, 'home_scorer'] = row[7].split(" ")[0]
        else:
            made_shots.at[i, 'away_scorer'] = row[9].split(" ")[0]
    
    for i, row in missed_shots.iterrows():
        if row[7] != None:
            missed_shots.at[i, 'home_scorer'] = row[7].split(" ")[1]
        else:
            missed_shots.at[i, 'away_scorer'] = row[9].split(" ")[1]
    
    for i, row in turnovers.iterrows():
        if row[7] != None:
            if 'Bad Pass' in row[7]:
                turnovers.at[i, 'home_turnover'] = row[7].split(" ")[0]
                turnovers.at[i, 'away_turnover'] = "N/A"
        else:
            if 'Bad Pass' in row[9]:
                turnovers.at[i, 'away_turnover'] = row[9].split(" ")[0]
                turnovers.at[i, 'home_turnover'] = "N/A"
    return made_shots.iloc[:,-4:], missed_shots.iloc[:,-3:], turnovers.iloc[:,-3:].dropna()

In [53]:
def find_player_with_ball(time_df):
    ball_sub_df = time_df.loc[time_df['player_id'] == -1]
    players_df = time_df.loc[time_df['player_id'] != -1]
    
    ball_x, ball_y = (ball_sub_df['x_loc'].iloc[0], ball_sub_df['y_loc'].iloc[0])
    team1_df = players_df.loc[players_df['team_id'] == players_df['team_id'].iloc[0]].copy()
    team2_df = players_df.loc[players_df['team_id'] != players_df['team_id'].iloc[0]].copy()
    
    team1_dist = np.array(np.sqrt((team1_df['y_loc']-ball_y)**2+(team1_df['x_loc']-ball_x)**2))
    team2_dist = np.array(np.sqrt((team2_df['y_loc']-ball_y)**2+(team2_df['x_loc']-ball_x)**2))
    
    smallest_team1 = np.where(team1_dist == np.amin(team1_dist))[0][0]
    smallest_team2 = np.where(team2_dist == np.amin(team2_dist))[0][0]

    if team1_dist[smallest_team1] < team2_dist[smallest_team2]:
        return team1_df.iloc[smallest_team1], team2_df.iloc[smallest_team2]
    else:
        return team2_df.iloc[smallest_team2], team1_df.iloc[smallest_team1]

In [54]:
def sg_filter(x, m, k=0):
    """
    x = Vector of sample times
    m = Order of the smoothing polynomial
    k = Which derivative
    """
    mid = int(len(x) / 2)
    a = x - x[mid]
    expa = lambda x: map(lambda i: i**x, a)
    A = np.r_[map(expa, range(0,m+1))].transpose()
    A = np.array([list(z) for z in list(A[0])]).transpose()
    Ai = np.linalg.pinv(np.array(A))
    return Ai[k]

def smooth(x, y, size=5, order=2, deriv=0):

    n = len(x)
    m = size

    result = np.zeros(n)

    for i in range(m, n-m):
        start, end = i - m, i + m + 1
        f = sg_filter(x[start:end], order, deriv)
        result[i] = np.dot(f, y[start:end])

    if deriv > 1:
        result *= math.factorial(deriv)

    return result

def correct_shots(ball_height, game_clock_time):

    size = 10
    order = 3

    params = (game_clock_time, ball_height, size, order)

    position_smoothed = smooth(*params, deriv=0)
    acceleration_smoothed = smooth(*params, deriv=2)
    max_ind = np.argmax(position_smoothed)

    shot_window = acceleration_smoothed[np.max([0, max_ind - 25]): max_ind]
    shot_min_ind = np.argmin(shot_window)
    shot_ind = max_ind - shot_min_ind
    shot_time = game_clock_time[shot_ind]

    return shot_time

In [55]:
def find_sequences(full_game, start, end, max_len=15):
    
    this_window_of_game = full_game.loc[full_game['total_time'].between(end, start, inclusive=False)].copy().reset_index()
    ball_in_window = this_window_of_game.loc[this_window_of_game['player_id']==-1]
    shot_time = correct_shots(np.array(ball_in_window['radius']), np.array(ball_in_window['total_time']))

    times = this_window_of_game['total_time'].unique()
    shot_time_index = np.where(times == shot_time)[0][0]
    
    all_shooter_ids = []
    all_defense_ids = []
    
    locs = []
    defense_locs = []
    for i in range(shot_time_index-15, shot_time_index):
        this_shot_time = times[shot_time_index]
        team1, team2 = find_player_with_ball(this_window_of_game.loc[this_window_of_game['total_time'] == this_shot_time])
        shooter_id = team1['player_id']
        defense_id = team2['player_id']
        
        x, y = (team1['x_loc'], team1['y_loc'])
        defense_x, defense_y = (team2['x_loc'], team2['y_loc'])
        
        locs.append((x,y))
        defense_locs.append((defense_x, defense_y))
        
        all_defense_ids.append(defense_id)
        all_shooter_ids.append(shooter_id)
    
    counts = np.bincount(all_shooter_ids)
    shooter_id = np.argmax(counts)
    
    defense_counts = np.bincount(all_defense_ids)
    defense_id = np.argmax(defense_counts)
    
    shooter_x, shooter_y = locs[np.where(all_shooter_ids==shooter_id)[0][0]]
    defense_shooter_x, defense_shooter_y = defense_locs[np.where(all_defense_ids==defense_id)[0][0]]
    
    times = this_window_of_game['total_time'].unique()
    ids = []
    
    current_player = 0
    for time_a in times:
        if (time_a - shot_time) < 5 and (time_a - shot_time) > 0:
            team1, team2 = find_player_with_ball(this_window_of_game.loc[this_window_of_game['total_time'] == time_a])
            is_shooter = False
            if team1['player_id'] == shooter_id:
                is_shooter = True
            if is_shooter == True:
                break
            else:
                ids.append(np.array([team1['player_id'], 
                                     team1['x_loc'], 
                                     team1['y_loc'], 
                                     team2['player_id'], 
                                     team2['x_loc'], 
                                     team2['y_loc']]))
            
    current_value = 0
    seqs = []
    
    for ind_id in ids:
        if current_value == ind_id[0]:
            seqs[-1].append(ind_id)
        else:
            current_value = ind_id[0]
            seqs.append([ind_id])
            
    cleaned_seqs = []
    for seq in seqs:
        if len(seq) > max_len:
            cleaned_seqs.append(seq)
            
    return_dict = {}
    
    return_dict['shooter_defend_id'] = defense_id
    return_dict['shooter_defend_x'] = defense_shooter_x
    return_dict['shooter_defend_y'] = defense_shooter_y
    
    return_dict['shooter_id'] = shooter_id
    return_dict['shooter_x'] = shooter_x
    return_dict['shooter_y'] = shooter_y
    
    return_dict['assist_id'] = cleaned_seqs[-1][-1][0]
    return_dict['assist_x'] = cleaned_seqs[-1][-1][1]
    return_dict['assist_y'] = cleaned_seqs[-1][-1][2]
    
    return_dict['defend_id'] = cleaned_seqs[-1][-1][3]
    return_dict['defend_x'] = cleaned_seqs[-1][-1][4]
    return_dict['defend_y'] = cleaned_seqs[-1][-1][5]
    
    return return_dict

In [56]:
def combine_track_pbp(shot, track_df, assist):
    try:
        return_dict = find_sequences(track_df, shot['seconds']+5, shot['seconds']-5)
        player_lname = get_basic_statistics.get_name_from_id(return_dict['shooter_id']).split(" ")[1]
        if (player_lname == shot['home_scorer'] or player_lname == shot['away_scorer']):
            return ([return_dict['shooter_id'], 
                     return_dict['shooter_x'], 
                     return_dict['shooter_y']],
                    [return_dict['assist_id'], 
                     return_dict['assist_x'], 
                     return_dict['assist_y']],
                    [return_dict['defend_id'], 
                     return_dict['defend_x'], 
                     return_dict['defend_y']],
                    [return_dict['shooter_defend_id'], 
                     return_dict['shooter_defend_x'], 
                     return_dict['shooter_defend_y']])
        else:
            return [0,0,0], [0,0,0], [0,0,0], [0,0,0]
    except (IndexError, ValueError) as e:
        return [0,0,0], [0,0,0], [0,0,0], [0,0,0]
           

In [57]:
def get_coords_for_game(gid, padding=8):
    made, missed, turnover = get_times_from_pbp(gid)
    
    full_game = tracking.get_tracking_data(gid, tracking_directory=TRACKING_DIRECTORY)
    
    scorers_of_assist = []
    assistors_of_assist = []
    defenders_of_assist = []
    defenders_of_shooters_of_assist = []
    
    for i, shot in made.iterrows():
        shoot, assist, defend, defend_shoot = combine_track_pbp(shot, full_game, assist=True)
        if shoot[0] != 0:
            scorers_of_assist.append(shoot)
            assistors_of_assist.append(assist)
            defenders_of_assist.append(defend)
            defenders_of_shooters_of_assist.append(defend_shoot)

    scorers_of_miss = []
    assistors_of_miss = []
    defenders_of_miss = []
    defenders_of_shooters_of_miss = []
    
    for i, shot in missed.iterrows():
        shoot, assist, defend, defend_shoot = combine_track_pbp(shot, full_game, assist=False)
        if shoot[0] != 0:
            scorers_of_miss.append(shoot)
            assistors_of_miss.append(assist)
            defenders_of_miss.append(defend)
            defenders_of_shooters_of_miss.append(defend_shoot)
    
    return scorers_of_assist, assistors_of_assist, defenders_of_assist, defenders_of_shooters_of_assist, scorers_of_miss, assistors_of_miss, defenders_of_miss, defenders_of_shooters_of_miss

In [58]:
def get_all_games():
    scorers_of_assist_all = []
    assistors_of_assist_all = [] 
    defenders_of_assist_all = [] 
    defenders_of_shooters_off_assist_all = [] 
    scorers_of_miss_all = []
    assistors_of_miss_all = [] 
    defenders_of_miss_all = []
    defenders_of_shooters_off_miss_all = [] 
    
    for i, filename in enumerate(tqdm(os.listdir(TRACKING_DIRECTORY))):
        gameid = filename.split('.')[0]
        
        scorers_of_assist, assistors_of_assist, defenders_of_assist, defenders_of_shooters_of_assist, scorers_of_miss, assistors_of_miss, defenders_of_miss, defenders_of_shooters_of_miss = get_coords_for_game(gameid)
        
        scorers_of_assist_all.extend(scorers_of_assist)
        assistors_of_assist_all.extend(assistors_of_assist)
        defenders_of_assist_all.extend(defenders_of_assist)
        scorers_of_miss_all.extend(scorers_of_miss)
        assistors_of_miss_all.extend(assistors_of_miss)
        defenders_of_miss_all.extend(defenders_of_miss)
        defenders_of_shooters_off_assist_all.extend(defenders_of_shooters_of_assist)
        defenders_of_shooters_off_miss_all.extend(defenders_of_shooters_of_miss)
        
        np.savetxt("nonaggregated/scorers_on_assist_new_algo.csv", scorers_of_assist_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/assistor_on_assist_new_algo.csv", assistors_of_assist_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/defender_on_assist_new_algo.csv", defenders_of_assist_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/shoot_defender_on_assist_new_algo.csv", defenders_of_shooters_off_assist_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/scorers_on_miss_new_algo.csv", scorers_of_miss_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/assistor_on_miss_new_algo.csv", assistors_of_miss_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/defender_on_miss_new_algo.csv", defenders_of_miss_all, delimiter=",", fmt="%s")
        np.savetxt("nonaggregated/shoot_defender_on_miss_new_algo.csv", defenders_of_shooters_off_miss_all, delimiter=",", fmt="%s")



In [59]:
get_all_games()

HBox(children=(IntProgress(value=0, max=632), HTML(value='')))