In [31]:
import pandas as pd
import os
import json
from pathlib import Path

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)

In [77]:
home_shot_path = os.path.join(os.getcwd(), "data/response_data/game_details/stats_home_shots_0021500367.json")
away_shot_path = os.path.join(os.getcwd(), "data/response_data/game_details/stats_away_shots_0021500367.json")

with open(home_shot_path, 'r') as file:
        home_shot_data = json.load(file)

with open(away_shot_path, 'r') as file:
        away_shot_data = json.load(file)

home_shot_df = pd.DataFrame(home_shot_data['resultSets'][0]['rowSet'], columns=home_shot_data['resultSets'][0]['headers'])
away_shot_df = pd.DataFrame(away_shot_data['resultSets'][0]['rowSet'], columns=away_shot_data['resultSets'][0]['headers'])

home_shot_df['event_id'] = home_shot_df['GAME_EVENT_ID']
away_shot_df['event_id'] = away_shot_df['GAME_EVENT_ID']

shot_df = pd.concat([home_shot_df, away_shot_df], ignore_index=True).sort_values(by=['event_id'])

In [78]:
group_2pts = shot_df[shot_df['SHOT_TYPE'] == '2PT Field Goal'].groupby(['PLAYER_ID', 'TEAM_ID', 'GAME_ID'])
shooting_percentage_2pts = group_2pts[['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG']].sum().reset_index().rename(columns={'SHOT_ATTEMPTED_FLAG':'shot_attempted_2pt','SHOT_MADE_FLAG' : 'shot_made_2pt'})
group_3pts = shot_df[shot_df['SHOT_TYPE'] == '3PT Field Goal'].groupby(['PLAYER_ID', 'TEAM_ID', 'GAME_ID'])
shooting_percentage_3pts = group_3pts[['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG']].sum().reset_index().rename(columns={'SHOT_ATTEMPTED_FLAG':'shot_attempted_3pt','SHOT_MADE_FLAG' : 'shot_made_3pt'})


In [79]:
shooting_percentage_df = pd.merge(shooting_percentage_2pts, shooting_percentage_3pts, how='left', on=['PLAYER_ID', 'TEAM_ID', 'GAME_ID']).fillna(0)
shooting_percentage_df['shot_attempted_3pt'] = shooting_percentage_df['shot_attempted_3pt'].astype('Int64')
shooting_percentage_df['shot_made_3pt'] = shooting_percentage_df['shot_made_3pt'].astype('Int64')

In [93]:
shooting_percentage_df[shooting_percentage_df['TEAM_ID'] == 1610612739]

Unnamed: 0,PLAYER_ID,TEAM_ID,GAME_ID,shot_attempted_2pt,shot_made_2pt,shot_attempted_3pt,shot_made_3pt
1,2544,1610612739,21500659,22,11,5,0
2,2590,1610612739,21500659,1,0,0,0
3,2747,1610612739,21500659,10,5,7,2
6,201567,1610612739,21500659,7,5,5,1
8,202389,1610612739,21500659,8,3,0,0
9,202681,1610612739,21500659,14,5,2,0
10,202684,1610612739,21500659,2,0,0,0
11,202697,1610612739,21500659,3,1,3,0
15,203521,1610612739,21500659,3,1,2,1


In [46]:
def get_game_ids(json_dir):
    full_json_dir = os.path.normpath(os.path.join(os.getcwd(), json_dir))
    game_ids = []

    for file_name in os.listdir(full_json_dir):
        file_path = os.path.join(full_json_dir, file_name)
        if os.path.isfile(file_path) and file_name.endswith('.json'):
            game_ids.append(file_name.split('.')[0])
    
    return game_ids

raw_movement_dir = os.path.normpath(os.path.join(os.getcwd(), "data/raw_movement"))
game_ids = get_game_ids(raw_movement_dir)

In [16]:
with open(os.path.join(os.getcwd(), "data/game_ids.txt"), 'w') as file:
    for game_id in game_ids:
        file.write(f"{game_id}\n")

In [19]:
with open(os.path.join(os.getcwd(), "data/game_ids.txt")) as file:
    game_ids = [line.rstrip('\n') for line in file]

In [68]:
def get_shot_data(shot_data_path, game_id):
    home_shot_path = Path.cwd() / shot_data_path / ("stats_home_shots_" + game_id + ".json")
    away_shot_path = Path.cwd() / shot_data_path / ("stats_away_shots_" + game_id + ".json")

    with open(home_shot_path, 'r') as file:
        home_shot_data = json.load(file)

    with open(away_shot_path, 'r') as file:
        away_shot_data = json.load(file)
    
    home_shot_df = pd.DataFrame(home_shot_data['resultSets'][0]['rowSet'], columns=home_shot_data['resultSets'][0]['headers'])
    away_shot_df = pd.DataFrame(away_shot_data['resultSets'][0]['rowSet'], columns=away_shot_data['resultSets'][0]['headers'])

    home_shot_df['event_id'] = home_shot_df['GAME_EVENT_ID']
    away_shot_df['event_id'] = away_shot_df['GAME_EVENT_ID']

    shot_df = pd.concat([home_shot_df, away_shot_df], ignore_index=True).sort_values(by=['event_id'])

    return shot_df

In [96]:
shot_data_path = "data/response_data/game_details"
cle_team_id = 1610612739
fg_pct_games = {}

for game_id in game_ids:
    shot_df = get_shot_data(shot_data_path, game_id)

    group_2pts = shot_df[shot_df['SHOT_TYPE'] == '2PT Field Goal'].groupby(['PLAYER_ID', 'TEAM_ID', 'GAME_ID'])
    shooting_percentage_2pts = group_2pts[['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG']].sum().reset_index().rename(columns={'SHOT_ATTEMPTED_FLAG':'shot_attempted_2pt','SHOT_MADE_FLAG' : 'shot_made_2pt'})
    group_3pts = shot_df[shot_df['SHOT_TYPE'] == '3PT Field Goal'].groupby(['PLAYER_ID', 'TEAM_ID', 'GAME_ID'])
    shooting_percentage_3pts = group_3pts[['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG']].sum().reset_index().rename(columns={'SHOT_ATTEMPTED_FLAG':'shot_attempted_3pt','SHOT_MADE_FLAG' : 'shot_made_3pt'})
    
    # get shooting percentage data of one game
    shooting_percentage_df = pd.merge(shooting_percentage_2pts, shooting_percentage_3pts, how='left', on=['PLAYER_ID', 'TEAM_ID', 'GAME_ID']).fillna(0)
    shooting_percentage_df['shot_attempted_3pt'] = shooting_percentage_df['shot_attempted_3pt'].astype('Int64')
    shooting_percentage_df['shot_made_3pt'] = shooting_percentage_df['shot_made_3pt'].astype('Int64')

    # keep only the CLE players
    fg_pct_games[game_id] = shooting_percentage_df[shooting_percentage_df['TEAM_ID'] == cle_team_id]


In [104]:
fg_pct_games_list = list(fg_pct_games.values())
fg_pct_games_df = pd.concat(fg_pct_games_list, axis=0).reset_index(drop=True)

In [105]:
fg_pct_games_df

Unnamed: 0,PLAYER_ID,TEAM_ID,GAME_ID,shot_attempted_2pt,shot_made_2pt,shot_attempted_3pt,shot_made_3pt
0,2210,1610612739,0021500002,3,2,4,2
1,2544,1610612739,0021500002,17,11,5,1
2,2590,1610612739,0021500002,8,4,7,3
3,2747,1610612739,0021500002,8,3,2,0
4,2760,1610612739,0021500002,1,0,0,0
...,...,...,...,...,...,...,...
361,202389,1610612739,0021500659,8,3,0,0
362,202681,1610612739,0021500659,14,5,2,0
363,202684,1610612739,0021500659,2,0,0,0
364,202697,1610612739,0021500659,3,1,3,0
