In [1]:
import pandas as pd
import pybaseball as pyb

$\textbf{Epidemiology: Ball Tracking Data Accumulation}$

- Done for __injured__ pitchers only (non-injured cohort still needs to be selected)
- Data pulled from start of injured season (+ spring training if present) up to the date of injury
    - Some gaps will exist between last pitch & injury date if IL date doesn't match exactly

In [2]:
# load cohort
cohort = pd.read_csv('cohort_data/d3_final_cohort.csv')
cohort['injury_date'] = pd.to_datetime(cohort['injury_date'], format='%Y-%m-%d')        # clean date format

$\textit{Query Ball Tracking Data}$

In [None]:
# initialize list to store dataframes
ball_tracking_data = []
no_data_players = []                                            # players w/ no data for reference

# iterate through each player and injury date
    # use list() for flexible restart
for i, row in list(cohort.iterrows()):
    
    # query params
    bam_id = row['mlbamid']                                     # get mlbam id
    injury_date = row['injury_date'].strftime('%Y-%m-%d')       # get injury date as a string
    start_date = f"{injury_date[:4]}-01-01"                     # set start date as 1/1 of year of injury  

    # pull data  
    try:
        player_data = pyb.statcast_pitcher(start_date, injury_date, player_id=bam_id)   # pull data
        
        # append to list if not empty
        if not player_data.empty:                                                       # check if data is empty
            ball_tracking_data.append(player_data)                                      # append to list
        else:
            no_data_players.append(row)                                                 # append to no data players list
    
    except Exception as e:
        print(f"Error retrieving data for {row['mlbamid']}: {e}")                          # print error message
        no_data_players.append(row)                                                     # append to no data players list

    print(f"Finished {i+1} of {len(cohort)} players.")                                  # print progress (follows "Gathering...")

Gathering Player Data
Finished 1 of 1380 players.
Gathering Player Data
Finished 2 of 1380 players.
Gathering Player Data
Finished 3 of 1380 players.
Gathering Player Data
Finished 4 of 1380 players.
Gathering Player Data
Finished 5 of 1380 players.
Gathering Player Data
Finished 6 of 1380 players.
Gathering Player Data
Finished 7 of 1380 players.
Gathering Player Data
Finished 8 of 1380 players.
Gathering Player Data
Finished 9 of 1380 players.
Gathering Player Data
Finished 10 of 1380 players.
Gathering Player Data
Finished 11 of 1380 players.
Gathering Player Data
Finished 12 of 1380 players.
Gathering Player Data
Finished 13 of 1380 players.
Gathering Player Data
Finished 14 of 1380 players.
Gathering Player Data
Finished 15 of 1380 players.
Gathering Player Data
Finished 16 of 1380 players.
Gathering Player Data
Finished 17 of 1380 players.
Gathering Player Data
Finished 18 of 1380 players.
Gathering Player Data
Finished 19 of 1380 players.
Gathering Player Data
Finished 20 of 138

In [None]:
# concatenate data
full_ball_tracking = pd.concat(ball_tracking_data, ignore_index=True)

# save to CSV (include date for last update reference)
full_ball_tracking.to_csv('ball_tracking_data/raw_0805.csv', index=False)

$\textit{Player Metadata}$

In [None]:
# TODO: get player height, mass, etc? --> cohort_bios


# TODO: store injury metadata (injury, pitches tracked prior to injury, etc) --> cohort_metadata