In [1]:
import numpy as np
import pandas as pd
from google.colab import drive
from ast import literal_eval
import ast
import math
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# convert to float from string
def parse_and_convert(locations_str):
    try:
        locations_list = ast.literal_eval(locations_str)
        if isinstance(locations_list, list):
            return [[float(val) for val in sublist] for sublist in locations_list]
        else:
            return None
    except (ValueError, SyntaxError):
        return None

# Function

In [3]:
def shot_check(ball_x, ball_y, r):
  condition_x = (3.25 < ball_x < 7.25) or (86.75 < ball_x < 90.75)
  condition_y = 23 < ball_y < 27
  condition_r = 9 < r < 11

  return condition_x and condition_y and condition_r

def get_poss(locations, shot_clock):
  poss = []

  for j in range(1, 11):
    square_sum = np.sum((np.array(locations[0][2:4]) - np.array(locations[j][2:4]))**2)

    if square_sum < poss_r ** 2:
      poss.append([locations[j][0], locations[j][1]])

  if len(poss) == 1:
    return poss, shot_clock
  elif len(poss) > 1:
    return -2, shot_clock
  else:
    return -1, shot_clock

wingspan = 6.976
wingspan_weight = 0.5
poss_r = wingspan * wingspan_weight
c1 = 2.5
c2 = 10.5

def td_to_event(path):
  json = pd.read_json(path)

  events = json["events"]
  game_id = json["gameid"][0]
  gamedate = json["gamedate"][0]
  visit_abb = events[0]["visitor"]["abbreviation"]
  home_abb = events[0]["home"]["abbreviation"]
  moments = []

  for dic in events:
    for moment in dic["moments"]:
      moments.append(moment)

  moments = pd.DataFrame(moments)
  moments = moments.drop_duplicates(subset = [1])

  moments = moments.reset_index()

  moments.columns = ["index", "quarter", "absolute_time", "quarter_time", "shot_clock", "unknown", "locations"]
  moments["game_clock"] = (moments.quarter - 1) * 720 + (720 - moments.quarter_time)
  moments.drop(["index", "unknown"], axis = 1, inplace = True)

  a = moments

  a = a[a['locations'].apply(len) == 11].reset_index().iloc[:, 1:]

  a['velocity_x'] = 0.0 # feet / 0.04(s)
  a['velocity_y'] = 0.0 # feet / 0.04(s)
  a['acc'] = 0.0
  a['acc_x'] = 0.0
  a['acc_y'] = 0.0
  a['ball_speed'] = 0.0

  # calculate velocity
  for i in range(1, a.shape[0]): 
      a.at[i, 'velocity_x'] = (a['locations'][i][0][2] - a['locations'][i-1][0][2]) / (0.04)
      a.at[i, 'velocity_y'] = (a['locations'][i][0][3] - a['locations'][i-1][0][3]) / (0.04)
      a.at[i, 'ball_speed'] = math.sqrt(a.at[i, 'velocity_x']**2 + a.at[i, 'velocity_y']**2)

  a['velocity_x'][0] = 0.0
  a['velocity_y'][0] = 0.0
  a['ball_speed'][0] = 0.0

  # if "ball_speed" > 100, do not consider possessions
  a = a[a["ball_speed"] < 100].reset_index().iloc[:, 1:]

  a["smoothed_speed"] = 0
  a["s_velocity_x"] = 0
  a["s_velocity_y"] = 0

  window_size = 5

  a["s_velocity_x"] = a["velocity_x"].rolling(window = window_size).mean()
  a["s_velocity_y"] = a["velocity_y"].rolling(window = window_size).mean()

  for i in range(2, a.shape[0]):
    a.at[i, 'smoothed_speed'] = math.sqrt(a.at[i, 's_velocity_x']**2 + a.at[i, 's_velocity_y']**2)
    a.at[i, 'acc_x'] = (a['s_velocity_x'][i] - a['s_velocity_x'][i-1])
    a.at[i, 'acc_y'] = (a['s_velocity_y'][i] - a['s_velocity_y'][i-1])
    a.at[i, 'acc'] = math.sqrt(a.at[i, 'acc_x']**2 + a.at[i, 'acc_y']**2)

  a["possession"] = -1

  wingspan = 6.976
  wingspan_weight = 0.5
  poss_r = wingspan * wingspan_weight
  c1 = 2.5
  c2 = 10.5

  prev_poss = None
  prev_shotclock = None

  for i in range(1, a.shape[0]):
    ball_x = a['locations'][i][0][2]
    ball_y = a['locations'][i][0][3]
    r = a["locations"][i][0][4]

    if (a["shot_clock"][i] is None) or (a["shot_clock"][i] > 23):
      prev_poss = None
      prev_shotclock = None

    if shot_check(ball_x, ball_y, r):
      a.at[i, "possession"] = 0

    elif (a["acc"][i] > c1) and (r < c2):
      poss, shotclock = get_poss(a["locations"][i], a["shot_clock"][i])

      if (poss in [-1, -2]):
        a.at[i, "possession"] = poss
      elif (a["quarter_time"][i] < 24):
        a.at[i, "possession"] = str(poss)
      elif (prev_poss is None) or ((prev_poss[0][0] == poss[0][0]) and (shotclock < prev_shotclock)) or ((prev_poss[0][0] != poss[0][0]) and (shotclock > prev_shotclock)):
        a.at[i, "possession"] = str(poss)
        prev_poss = poss
        prev_shotclock = shotclock

  for i in range(1, a.shape[0]-1):
    if a["possession"][i] == 0:
      if a["shot_clock"][i] is None:
        pass
      elif a["shot_clock"][i] > 23:
        a["possession"][i] = -1
      elif (a["shot_clock"][i + 1] <= a["shot_clock"][i]) or (np.isnan(a["shot_clock"][i + 1]) and (a["quarter_time"][i + 1] > 24)):
        a["possession"][i + 1] = 0

  a['info']= [[] for _ in range(len(a))]
  a_info = pd.DataFrame()

  for j in range(1, max(a["quarter"] + 1)):
    b = a[a['quarter'] == j]

    for i in range(b.index[0], b.index[-1] + 1):
        if b['possession'][i] == -1 or b['possession'][i] == -2:
            j = 1
            while i - j >= b.index[0]:
                if b['possession'][i - j] != -1 and b['possession'][i - j] != -2:
                    b['info'][i].append(b['possession'][i - j])
                    break
                j += 1
            j = 1
            while i + j < b.index[-1] + 1:
                if b['possession'][i + j] != -1 and b['possession'][i + j] != -2:
                    b['info'][i].append(b['possession'][i + j])
                    break
                j += 1
        else:
            pass

    a_info = pd.concat([a_info, b], axis=0)

  for i in range(1, a_info.shape[0]):
      info_i = a_info["info"][i]

      if len(info_i) == 2:
          if info_i[1] == 0:
              a_info.at[i, "possession"] = 0
          elif info_i[0] == info_i[1]:
              a_info.at[i, "possession"] = info_i[0]

  #event detection
  col_names = ["type", "quarter", "quarter_time", "shot_clock", "start_ball", "end_ball", "start_locations", "end_locations", "first_player", "second_player"]
  event_df = pd.DataFrame(columns = col_names)

  for j in range(1, max(a_info["quarter"] + 1)):

    b = a_info[a_info['quarter'] == j]
    i = b.index[0]
    x = None

    while i < b.index[-1] + 1:

      if b['possession'][i] == -1 or b['possession'][i] == -2:
          event = pd.DataFrame(columns = col_names)

          j = 1
          while i - j >= b.index[0]:
              if b['possession'][i - j] != -1 and b['possession'][i - j] != -2:
                  x = b.iloc[i - j - b.index[0], :]
                  break
              j += 1

          j = 1
          while i + j < b.index[-1] + 1:
              if b['possession'][i + j] != -1 and b['possession'][i + j] != -2:
                  y = b.iloc[i + j - b.index[0], :]
                  break
              j += 1

          if (x is None) or (x["possession"] == 0):
              i = i + j
              continue

          if x["possession"][0][0] == y["possession"][0][0]:
            tp = "pass"
            event.at[0, "type"] = tp
          elif x["possession"][0][0] != y["possession"][0][0]:
            tp = "steal"
            event.at[0, "type"] = tp

          event.at[0, "quarter"] = x["quarter"]
          event.at[0, "quarter_time"] = x["quarter_time"]
          event.at[0, "shot_clock"] = x["shot_clock"]
          event.at[0, "start_ball"] = x["locations"][0][2:4]
          event.at[0, "end_ball"] = y["locations"][0][2:4]
          event.at[0, 'start_locations'] = x["locations"]
          event.at[0, 'end_locations'] = y["locations"]
          event.at[0, "first_player"] = x["possession"]
          event.at[0, "second_player"] = y["possession"]

          event_df = pd.concat([event_df, event], axis = 0)
          i = i + j

      elif b['possession'][i] == 0:
          event = pd.DataFrame(columns = col_names)

          j = 1
          while i - j >= b.index[0]:
              if b['possession'][i - j] != -1 and b['possession'][i - j] != -2 and b['possession'][i - j] != 0:
                  x = b.iloc[i - j - b.index[0], :]
                  break
              j += 1

          j = 1
          while i + j < b.index[-1] + 1:
              if b['possession'][i + j] != -1 and b['possession'][i + j] != -2 and b['possession'][i + j] != 0:
                  y = b.iloc[i + j - b.index[0], :]
                  break
              j += 1

          if (x is None):
              i = i + j
              continue

          event.at[0, "type"] = "shot"
          event.at[0, "quarter"] = x["quarter"]
          event.at[0, "quarter_time"] = x["quarter_time"]
          event.at[0, "shot_clock"] = x["shot_clock"]
          event.at[0, "start_ball"] = x["locations"][0][2:4]
          event.at[0, 'start_locations'] = x["locations"]
          event.at[0, "first_player"] = x["possession"]

          event_df = pd.concat([event_df, event], axis = 0)
          i = i + j

      else:
          i += 1

  event_df["game_id"] = game_id
  event_df["gamedate"] = gamedate
  event_df["visitor"] = visit_abb
  event_df["home"] = home_abb

  return event_df

# CLE, GSW Data collect

In [None]:
os.chdir('/content/drive/MyDrive/CLE_json')

file_list = os.listdir()

CLE_DF = pd.DataFrame()

for path in file_list:
  event_df = td_to_event(path)
  CLE_DF = pd.concat([CLE_DF, event_df], axis = 0)
  print(path)

In [6]:
len(CLE_DF["game_id"].unique())

38

In [24]:
CLE_DF_pass_shot = CLE_DF[CLE_DF["type"].isin(["pass", "shot"])]

In [25]:
# data type transform
CLE_DF_pass_shot['first_player'] = CLE_DF_pass_shot['first_player'].apply(parse_and_convert)
CLE_DF_pass_shot['second_player'] = CLE_DF_pass_shot['second_player'].apply(parse_and_convert)

# data type transform
CLE_DF_pass_shot['start_ball'] = CLE_DF_pass_shot['start_ball'].apply(lambda x: [x[0], -x[1]])
def transform_end_ball(x):
    if x["type"] == "pass":
        return [x["end_ball"][0], -x["end_ball"][1]]
    else:
        return x["end_ball"]
CLE_DF_pass_shot['end_ball'] = CLE_DF_pass_shot.apply(transform_end_ball, axis=1)

def modify(sublist):
    return [[x[0], x[1],x[2],-x[3],x[4]] for x in sublist]

CLE_DF_pass_shot["start_locations"]=CLE_DF_pass_shot["start_locations"].apply(modify)
def modify_end_locations(row):
    if row['type'] == 'pass':
        return modify(row['end_locations'])
    else:
        return row['end_locations']
CLE_DF_pass_shot['end_locations'] = CLE_DF_pass_shot.apply(modify_end_locations, axis=1)

CLE_DF_pass_shot

Unnamed: 0,type,quarter,quarter_time,shot_clock,start_ball,end_ball,start_locations,end_locations,first_player,second_player,game_id,gamedate,visitor,home
0,pass,1,714.73,20.63,"[54.48924, -23.829]","[60.37034, -15.05877]","[[-1, -1, 54.48924, -23.829, 6.63428], [161061...","[[-1, -1, 60.37034, -15.05877, 5.95398], [1610...","[[1610612741.0, 202710.0]]","[[1610612741.0, 202703.0]]",21500002,2015-10-27,CLE,CHI
0,pass,1,709.65,15.55,"[62.68729, -17.05803]","[66.47891, -31.00121]","[[-1, -1, 62.68729, -17.05803, 7.89834], [1610...","[[-1, -1, 66.47891, -31.00121, 5.97268], [1610...","[[1610612741.0, 202703.0]]","[[1610612741.0, 2200.0]]",21500002,2015-10-27,CLE,CHI
0,pass,1,707.09,12.99,"[68.83468, -37.28891]","[72.20677, -43.15451]","[[-1, -1, 68.83468, -37.28891, 5.27702], [1610...","[[-1, -1, 72.20677, -43.15451, 5.15137], [1610...","[[1610612741.0, 2200.0]]","[[1610612741.0, 202710.0]]",21500002,2015-10-27,CLE,CHI
0,pass,1,704.13,10.03,"[79.50138, -42.40693]","[70.99923, -34.11109]","[[-1, -1, 79.50138, -42.40693, 3.43591], [1610...","[[-1, -1, 70.99923, -34.11109, 2.47765], [1610...","[[1610612741.0, 202710.0]]","[[1610612741.0, 2200.0]]",21500002,2015-10-27,CLE,CHI
0,pass,1,701.65,7.55,"[76.18626, -23.93565]","[68.07641, -14.23657]","[[-1, -1, 76.18626, -23.93565, 6.72025], [1610...","[[-1, -1, 68.07641, -14.23657, 2.9477], [16106...","[[1610612741.0, 2200.0]]","[[1610612741.0, 203503.0]]",21500002,2015-10-27,CLE,CHI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,pass,4,25.07,18.22,"[70.06163, -21.1604]","[81.7931, -20.86234]","[[-1, -1, 70.06163, -21.1604, 2.64025], [16106...","[[-1, -1, 81.7931, -20.86234, 10.01355], [1610...","[[1610612751.0, 202711.0]]","[[1610612739.0, 202684.0]]",21500631,2016-01-20,CLE,BKN
0,pass,4,23.87,16.9,"[81.7931, -20.86234]","[76.57978, -45.49478]","[[-1, -1, 81.7931, -20.86234, 10.01355], [1610...","[[-1, -1, 76.57978, -45.49478, 5.7386], [16106...","[[1610612739.0, 202684.0]]","[[1610612751.0, 203499.0]]",21500631,2016-01-20,CLE,BKN
0,shot,4,21.91,14.75,"[78.09727, -42.50817]",,"[[-1, -1, 78.09727, -42.50817, 9.9099], [16106...",,"[[1610612751.0, 203499.0]]",,21500631,2016-01-20,CLE,BKN
0,pass,4,15.86,,"[70.69002, -19.70224]","[62.66886, -15.79281]","[[-1, -1, 70.69002, -19.70224, 5.87762], [1610...","[[-1, -1, 62.66886, -15.79281, 3.79202], [1610...","[[1610612739.0, 202684.0]]","[[1610612739.0, 2210.0]]",21500631,2016-01-20,CLE,BKN


In [40]:
# filter only plays by CLE
CLE_DF_pass_shot = CLE_DF_pass_shot[CLE_DF_pass_shot['first_player'].apply(lambda x: x[0][0] == 1610612739.0)].reset_index()
CLE_DF_pass_shot

Unnamed: 0,index,type,quarter,quarter_time,shot_clock,start_ball,end_ball,start_locations,end_locations,first_player,second_player,game_id,gamedate,visitor,home
0,0,pass,1,694.54,22.65,"[85.58063, -32.20436]","[77.39659, -41.05928]","[[-1, -1, 85.58063, -32.20436, 6.12211], [1610...","[[-1, -1, 77.39659, -41.05928, 3.965], [161061...","[[1610612739.0, 201567.0]]","[[1610612739.0, 2590.0]]",21500002,2015-10-27,CLE,CHI
1,0,pass,1,687.14,15.31,"[34.75367, -31.62725]","[28.21327, -36.59227]","[[-1, -1, 34.75367, -31.62725, 7.02781], [1610...","[[-1, -1, 28.21327, -36.59227, 6.43881], [1610...","[[1610612739.0, 2590.0]]","[[1610612739.0, 2544.0]]",21500002,2015-10-27,CLE,CHI
2,0,pass,1,686.66,14.84,"[27.97766, -36.54447]","[9.19308, -27.9452]","[[-1, -1, 27.97766, -36.54447, 6.20019], [1610...","[[-1, -1, 9.19308, -27.9452, 4.29929], [161061...","[[1610612739.0, 2544.0]]","[[1610612739.0, 201567.0]]",21500002,2015-10-27,CLE,CHI
3,0,shot,1,684.94,13.13,"[6.74066, -26.49651]",,"[[-1, -1, 6.74066, -26.49651, 7.41575], [16106...",,"[[1610612739.0, 201567.0]]",,21500002,2015-10-27,CLE,CHI
4,0,pass,1,671.38,23.69,"[83.75809, -26.39103]","[77.03251, -32.79359]","[[-1, -1, 83.75809, -26.39103, 7.8150200000000...","[[-1, -1, 77.03251, -32.79359, 6.46341], [1610...","[[1610612739.0, 202389.0]]","[[1610612739.0, 2590.0]]",21500002,2015-10-27,CLE,CHI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14286,0,pass,4,48.3,24.0,"[86.85578, -25.60996]","[80.77568, -21.78331]","[[-1, -1, 86.85578, -25.60996, 3.99966], [1610...","[[-1, -1, 80.77568, -21.78331, 3.99282], [1610...","[[1610612739.0, 202684.0]]","[[1610612739.0, 2590.0]]",21500631,2016-01-20,CLE,BKN
14287,0,shot,4,33.68,9.44,"[30.32186, -32.26132]",,"[[-1, -1, 30.32186, -32.26132, 8.32221], [1610...",,"[[1610612739.0, 2590.0]]",,21500631,2016-01-20,CLE,BKN
14288,0,pass,4,23.87,16.9,"[81.7931, -20.86234]","[76.57978, -45.49478]","[[-1, -1, 81.7931, -20.86234, 10.01355], [1610...","[[-1, -1, 76.57978, -45.49478, 5.7386], [16106...","[[1610612739.0, 202684.0]]","[[1610612751.0, 203499.0]]",21500631,2016-01-20,CLE,BKN
14289,0,pass,4,15.86,,"[70.69002, -19.70224]","[62.66886, -15.79281]","[[-1, -1, 70.69002, -19.70224, 5.87762], [1610...","[[-1, -1, 62.66886, -15.79281, 3.79202], [1610...","[[1610612739.0, 202684.0]]","[[1610612739.0, 2210.0]]",21500631,2016-01-20,CLE,BKN


In [41]:
CLE_DF_pass_shot.to_csv("CLE_pass_shot.csv", index = False)

In [52]:
def mirror_locations(locations):
  modified_locations = []
  for location in locations:
    zero = location[0]
    one = location[1]
    four = location[4]
    x = location[2]
    y = location[3]
    x = 94 - x
    y = -50 - y
    modified_locations.append([zero, one, x, y, four])
  return modified_locations

CLE_DF_pass_shot_half = CLE_DF_pass_shot

for i in range(CLE_DF_pass_shot_half.shape[0]):
  if CLE_DF_pass_shot_half.at[i, "type"] == "pass":
    for j in range(i + 1, CLE_DF_pass_shot_half.shape[0]):
      if CLE_DF_pass_shot_half.at[j, "type"] == "shot":
          next_shot = CLE_DF_pass_shot_half.iloc[j, :]
          break
    if next_shot["start_ball"][0] > 47:
      CLE_DF_pass_shot_half.at[i, "start_ball"] = [94 - CLE_DF_pass_shot_half.at[i, "start_ball"][0], -50 - CLE_DF_pass_shot_half.at[i, "start_ball"][1]]
      CLE_DF_pass_shot_half.at[i, "end_ball"] = [94 - CLE_DF_pass_shot_half.at[i, "end_ball"][0], -50 - CLE_DF_pass_shot_half.at[i, "end_ball"][1]]

      CLE_DF_pass_shot_half.at[i, "start_locations"] = mirror_locations(CLE_DF_pass_shot_half.at[i, "start_locations"])
      CLE_DF_pass_shot_half.at[i, "end_locations"] = mirror_locations(CLE_DF_pass_shot_half.at[i, "end_locations"])

In [53]:
CLE_DF_pass_shot_half.to_csv("CLE_pass_shot_half.csv", index = False)

In [None]:
#GSW
os.chdir('/content/drive/MyDrive/GSW_json')

file_list = os.listdir()

GSW_DF = pd.DataFrame()

for path in file_list:
  event_df = td_to_event(path)
  GSW_DF = pd.concat([GSW_DF, event_df], axis = 0)

In [57]:
len(GSW_DF["game_id"].unique())

41

In [58]:
GSW_DF_pass_shot = GSW_DF[GSW_DF["type"].isin(["pass", "shot"])]

In [59]:
# data type transform
GSW_DF_pass_shot['first_player'] = GSW_DF_pass_shot['first_player'].apply(parse_and_convert)
GSW_DF_pass_shot['second_player'] = GSW_DF_pass_shot['second_player'].apply(parse_and_convert)

# data type transform
GSW_DF_pass_shot['start_ball'] = GSW_DF_pass_shot['start_ball'].apply(lambda x: [x[0], -x[1]])
def transform_end_ball(x):
    if x["type"] == "pass":
        return [x["end_ball"][0], -x["end_ball"][1]]
    else:
        return x["end_ball"]
GSW_DF_pass_shot['end_ball'] = GSW_DF_pass_shot.apply(transform_end_ball, axis=1)

def modify(sublist):
    return [[x[0], x[1],x[2],-x[3],x[4]] for x in sublist]

GSW_DF_pass_shot["start_locations"]=GSW_DF_pass_shot["start_locations"].apply(modify)
def modify_end_locations(row):
    if row['type'] == 'pass':
        return modify(row['end_locations'])
    else:
        return row['end_locations']
GSW_DF_pass_shot['end_locations'] = GSW_DF_pass_shot.apply(modify_end_locations, axis=1)

GSW_DF_pass_shot

Unnamed: 0,type,quarter,quarter_time,shot_clock,start_ball,end_ball,start_locations,end_locations,first_player,second_player,game_id,gamedate,visitor,home
0,pass,1,714.08,18.64,"[60.90924, -17.29644]","[62.91085, -35.71514]","[[-1, -1, 60.90924, -17.29644, 2.4522], [16106...","[[-1, -1, 62.91085, -35.71514, 4.24947], [1610...","[[1610612740.0, 101126.0]]","[[1610612740.0, 201569.0]]",21500003,2015-10-27,NOP,GSW
0,pass,1,711.06,15.91,"[60.55193, -16.11172]","[87.97921, -3.2469]","[[-1, -1, 60.55193, -16.11172, 5.51909], [1610...","[[-1, -1, 87.97921, -3.2469, 3.72891], [161061...","[[1610612740.0, 201569.0]]","[[1610612740.0, 203076.0]]",21500003,2015-10-27,NOP,GSW
0,shot,1,709.94,14.89,"[88.03217, -3.33573]",,"[[-1, -1, 88.03217, -3.33573, 4.28153], [16106...",,"[[1610612740.0, 203076.0]]",,21500003,2015-10-27,NOP,GSW
0,pass,1,706.64,22.75,"[87.91226, -25.22166]","[85.56164, -28.7999]","[[-1, -1, 87.91226, -25.22166, 4.48338], [1610...","[[-1, -1, 85.56164, -28.7999, 5.24622], [16106...","[[1610612744.0, 101106.0]]","[[1610612744.0, 201939.0]]",21500003,2015-10-27,NOP,GSW
0,pass,1,700.24,16.44,"[40.23281, -17.46913]","[19.03083, -34.07791]","[[-1, -1, 40.23281, -17.46913, 4.91573], [1610...","[[-1, -1, 19.03083, -34.07791, 3.61312], [1610...","[[1610612744.0, 201939.0]]","[[1610612744.0, 202691.0]]",21500003,2015-10-27,NOP,GSW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,shot,4,6.6,,"[77.5045, -24.93963]",,"[[-1, -1, 77.5045, -24.93963, 10.39924], [1610...",,"[[1610612754.0, 1626202.0]]",,21500652,2016-01-22,IND,GSW
0,pass,4,6.6,,"[87.32524, -24.45373]","[75.01623, -24.63284]","[[-1, -1, 87.32524, -24.45373, 3.45074], [1610...","[[-1, -1, 75.01623, -24.63284, 3.48056], [1610...","[[1610612744.0, 201578.0]]","[[1610612754.0, 1626202.0]]",21500652,2016-01-22,IND,GSW
0,shot,4,6.6,,"[76.8611, -24.70698]",,"[[-1, -1, 76.8611, -24.70698, 9.64546], [16106...",,"[[1610612754.0, 1626202.0]]",,21500652,2016-01-22,IND,GSW
0,pass,4,6.6,,"[92.21928, -26.39587]","[77.82897, -37.87026]","[[-1, -1, 92.21928, -26.39587, 4.92419], [1610...","[[-1, -1, 77.82897, -37.87026, 3.24005], [1610...","[[1610612744.0, 201578.0]]","[[1610612744.0, 203546.0]]",21500652,2016-01-22,IND,GSW


In [63]:
# filter only passes by GSW
GSW_DF_pass_shot = GSW_DF_pass_shot[GSW_DF_pass_shot['first_player'].apply(lambda x: x[0][0] == 1610612744.0)].reset_index()
GSW_DF_pass_shot

Unnamed: 0,index,type,quarter,quarter_time,shot_clock,start_ball,end_ball,start_locations,end_locations,first_player,second_player,game_id,gamedate,visitor,home
0,0,pass,1,706.64,22.75,"[87.91226, -25.22166]","[85.56164, -28.7999]","[[-1, -1, 87.91226, -25.22166, 4.48338], [1610...","[[-1, -1, 85.56164, -28.7999, 5.24622], [16106...","[[1610612744.0, 101106.0]]","[[1610612744.0, 201939.0]]",21500003,2015-10-27,NOP,GSW
1,0,pass,1,700.24,16.44,"[40.23281, -17.46913]","[19.03083, -34.07791]","[[-1, -1, 40.23281, -17.46913, 4.91573], [1610...","[[-1, -1, 19.03083, -34.07791, 3.61312], [1610...","[[1610612744.0, 201939.0]]","[[1610612744.0, 202691.0]]",21500003,2015-10-27,NOP,GSW
2,0,pass,1,698.23,14.45,"[17.99225, -36.17537]","[18.83462, -44.53255]","[[-1, -1, 17.99225, -36.17537, 8.56898], [1610...","[[-1, -1, 18.83462, -44.53255, 3.52893], [1610...","[[1610612744.0, 202691.0]]","[[1610612744.0, 203110.0]]",21500003,2015-10-27,NOP,GSW
3,0,shot,1,697.26,13.5,"[19.44348, -44.53242]",,"[[-1, -1, 19.44348, -44.53242, 6.60527], [1610...",,"[[1610612744.0, 203110.0]]",,21500003,2015-10-27,NOP,GSW
4,0,pass,1,678.56,22.14,"[92.94451, -23.12149]","[84.03311, -32.39807]","[[-1, -1, 92.94451, -23.12149, 4.19932], [1610...","[[-1, -1, 84.03311, -32.39807, 1.06005], [1610...","[[1610612744.0, 101106.0]]","[[1610612744.0, 201939.0]]",21500003,2015-10-27,NOP,GSW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17767,0,pass,4,17.36,,"[32.56265, -7.35882]","[33.54021, -18.19683]","[[-1, -1, 32.56265, -7.35882, 6.86171], [16106...","[[-1, -1, 33.54021, -18.19683, 5.5089], [16106...","[[1610612744.0, 202691.0]]","[[1610612744.0, 201578.0]]",21500652,2016-01-22,IND,GSW
17768,0,shot,4,15.88,,"[30.24998, -22.57638]",,"[[-1, -1, 30.24998, -22.57638, 10.45603], [161...",,"[[1610612744.0, 201578.0]]",,21500652,2016-01-22,IND,GSW
17769,0,pass,4,6.6,,"[87.32524, -24.45373]","[75.01623, -24.63284]","[[-1, -1, 87.32524, -24.45373, 3.45074], [1610...","[[-1, -1, 75.01623, -24.63284, 3.48056], [1610...","[[1610612744.0, 201578.0]]","[[1610612754.0, 1626202.0]]",21500652,2016-01-22,IND,GSW
17770,0,pass,4,6.6,,"[92.21928, -26.39587]","[77.82897, -37.87026]","[[-1, -1, 92.21928, -26.39587, 4.92419], [1610...","[[-1, -1, 77.82897, -37.87026, 3.24005], [1610...","[[1610612744.0, 201578.0]]","[[1610612744.0, 203546.0]]",21500652,2016-01-22,IND,GSW


In [64]:
GSW_DF_pass_shot.to_csv("GSW_pass_shot.csv", index = False)

In [65]:
def mirror_locations(locations):
  modified_locations = []
  for location in locations:
    zero = location[0]
    one = location[1]
    four = location[4]
    x = location[2]
    y = location[3]
    x = 94 - x
    y = -50 - y
    modified_locations.append([zero, one, x, y, four])
  return modified_locations

GSW_DF_pass_shot_half = GSW_DF_pass_shot

for i in range(GSW_DF_pass_shot_half.shape[0]):
  if GSW_DF_pass_shot_half.at[i, "type"] == "pass":
    for j in range(i + 1, GSW_DF_pass_shot_half.shape[0]):
      if GSW_DF_pass_shot_half.at[j, "type"] == "shot":
          next_shot = GSW_DF_pass_shot_half.iloc[j, :]
          break
    if next_shot["start_ball"][0] > 47:
      GSW_DF_pass_shot_half.at[i, "start_ball"] = [94 - GSW_DF_pass_shot_half.at[i, "start_ball"][0], -50 - GSW_DF_pass_shot_half.at[i, "start_ball"][1]]
      GSW_DF_pass_shot_half.at[i, "end_ball"] = [94 - GSW_DF_pass_shot_half.at[i, "end_ball"][0], -50 - GSW_DF_pass_shot_half.at[i, "end_ball"][1]]

      GSW_DF_pass_shot_half.at[i, "start_locations"] = mirror_locations(GSW_DF_pass_shot_half.at[i, "start_locations"])
      GSW_DF_pass_shot_half.at[i, "end_locations"] = mirror_locations(GSW_DF_pass_shot_half.at[i, "end_locations"])

In [67]:
GSW_DF_pass_shot_half.to_csv("GSW_pass_shot_half.csv", index = False)