In [1]:
import pandas as pd
import glob
import json
import numpy as np
import math

def csv_2_pandas(player_id):
    files = glob.glob("players/" + player_id + "/*/*.csv")
    data = []
    i = 0
    for file in files:
        i += 1
        data.append(pd.read_csv(file, delimiter=','))
        data[i-1]["SEASON"] = i
    
    return pd.concat(data)

In [2]:
def transform(data):
    data = data.drop(['GRID_TYPE','GAME_EVENT_ID','PLAYER_ID','PLAYER_NAME','SHOT_ATTEMPTED_FLAG'], axis=1)

    team_lkp = pd.read_csv("team_name_dict.csv")

    data = data.merge(team_lkp, how='left', on='TEAM_NAME')

    data['HME_AWA'] = np.where(data['HTM'] == data['ABBR'], "HOME", "AWAY")
    data['OPP'] = np.where(data['HTM'] == data['ABBR'], data['VTM'], data['HTM'])

    with open("teams.json","r") as fin:
        team_list = json.loads(fin.read())

    team_id_dict = {}

    for team in team_list:
        team_id_dict[team['abbreviation']] = team['teamId']

    data['OPP_ID'] = data['OPP'].apply(lambda x: team_id_dict[x])

    data['MONTH'] = data['GAME_DATE'].astype('str').apply(lambda x: x[4:6])
    data = data.drop(['TEAM_NAME', 'HTM','VTM','ABBR','OPP','EVENT_TYPE','SHOT_ZONE_RANGE', 'SHOT_ZONE_AREA','GAME_DATE'],axis=1)

    qtr_dict = {1: 36, 2: 24, 3: 12, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0}

    period = data['PERIOD'].astype('int').apply(lambda x: qtr_dict[x])
    data['MIN_REM'] = np.where(period >= 0, period + data['MINUTES_REMAINING'], period * data['MINUTES_REMAINING'])
    data = data.drop(['MINUTES_REMAINING'], axis=1)
    data['TIME_REMAINING'] = data['MIN_REM'] + (data['SECONDS_REMAINING'] / 60)
    data = data.drop(['SECONDS_REMAINING','ACTION_TYPE'], axis=1)

    range_dict = {'Mid-Range' : 1, 'Restricted Area': 0, 'Left Corner 3': 2,
           'In The Paint (Non-RA)': 1, 'Above the Break 3': 2, 'Right Corner 3': 2,
           'Backcourt': 3}

    data["SHOT_DIFF"] = data["SHOT_ZONE_BASIC"].apply(lambda x: range_dict[x])
    data = data.drop(["SHOT_TYPE","SHOT_ZONE_BASIC","MIN_REM"], axis=1)

    data["HME_AWA"] = data["HME_AWA"].apply(lambda x: 0 if x == 'HOME' else 1)
    
    data['TIME_PER'] = data["TIME_REMAINING"].apply(lambda x: math.floor(x))

    
    return data

In [3]:
def create_data(player_id):    
    data = csv_2_pandas(player_id)
    data = transform(data)
    with open("players/" + player_id + "/data.csv", "w") as fout:
        fout.write(data.to_csv(index=False))

In [7]:
create_data(str(201939))

In [5]:
create_data(str(2544))

In [4]:
create_data(str(201142))

In [8]:
create_data(str(202695))

In [9]:
create_data(str(2546))

In [10]:
create_data(str(977))

In [11]:
create_data(str(893))

In [12]:
create_data(str(2544))