In [2]:
import pandas as pd
import itertools

In [5]:
ball_acquired_df = pd.read_csv('final_safe_probabilities.csv')
big_ie = pd.read_csv('big_ie.csv')

In [6]:
combos = ball_acquired_df[['game_str', 'play_id']].drop_duplicates()

cutoff_results = []

for _, row in combos.iterrows():
    game_str = row['game_str']
    play_id = row['play_id']

    play_events = big_ie[(big_ie['game_str'] == game_str) & (big_ie['play_id'] == play_id)]
    play_events = play_events.sort_values('timestamp')

    # Find all outfielder ball acquired events
    of_acq = play_events[
        (play_events['play_type'] == 'ball acquired') &
        (play_events['position'].isin(['left field', 'center field', 'right field']))
    ]

    if of_acq.empty:
        cutoff_results.append({'game_str': game_str, 'play_id': play_id, 'cutoff_man': None, 'cutoff_decision': None})
        continue

    # Find the first outfielder to acquire the ball
    first_of_acq_idx = of_acq.index[0]

    # Find the next ball acquired event after the outfielder
    after_of = play_events.loc[first_of_acq_idx+1:]

    cutoff_acq = after_of[(after_of['play_type'] == 'ball acquired') & (~after_of['position'].isin(['left field', 'center field', 'right field']))]
    if cutoff_acq.empty:
        cutoff_results.append({'game_str': game_str, 'play_id': play_id, 'cutoff_man': None, 'cutoff_decision': None})
        continue

    cutoff_row = cutoff_acq.iloc[0]
    cutoff_man = cutoff_row['position']
    cutoff_idx = cutoff_row.name

    # Find if cutoff man threw the ball (event_code == 3) after acquiring
    after_cutoff = play_events.loc[cutoff_idx+1:]
    throws = after_cutoff[(after_cutoff['event_code'] == 2)]

    if cutoff_man == 'catcher':
        cutoff_decision = 'let'
    elif throws.empty:
        cutoff_decision = 'cut and hold'
    else:
        throw_row = throws.iloc[0]
        receiver_pos = throw_row['position']
        
        if receiver_pos == 'catcher':
            cutoff_decision = 'cut and throw to catcher'
        else:
            cutoff_decision = 'cut and relay to ' + receiver_pos

            # if cutoff_row['game_str'] == 'y2_d097_YJD_RZQ' and cutoff_row['play_id'] == 53:
            #     break

    cutoff_results.append({'game_str': game_str, 'play_id': play_id, 'cutoff_man': cutoff_man, 'cutoff_decision': cutoff_decision})

cutoff_df = pd.DataFrame(cutoff_results)
cutoff_df

Unnamed: 0,game_str,play_id,cutoff_man,cutoff_decision
0,y1_d001_CGA_QEA,108,second baseman,cut and relay to first baseman
1,y1_d001_CGA_QEA,114,second baseman,cut and hold
2,y1_d001_CGA_QEA,216,shortstop,cut and hold
3,y1_d002_CGA_QEA,92,shortstop,cut and relay to third baseman
4,y1_d003_CGA_QEA,92,second baseman,cut and hold
...,...,...,...,...
797,y2_d091_NYA_YJD,47,third baseman,cut and hold
798,y2_d094_QZE_RZQ,135,shortstop,cut and hold
799,y2_d094_QZE_RZQ,175,third baseman,cut and hold
800,y2_d094_QZE_RZQ,333,shortstop,cut and hold


In [8]:
cutoff_df_clean = cutoff_df[cutoff_df['cutoff_decision'].notna()]
cutoff_df_clean

Unnamed: 0,game_str,play_id,cutoff_man,cutoff_decision
0,y1_d001_CGA_QEA,108,second baseman,cut and relay to first baseman
1,y1_d001_CGA_QEA,114,second baseman,cut and hold
2,y1_d001_CGA_QEA,216,shortstop,cut and hold
3,y1_d002_CGA_QEA,92,shortstop,cut and relay to third baseman
4,y1_d003_CGA_QEA,92,second baseman,cut and hold
...,...,...,...,...
797,y2_d091_NYA_YJD,47,third baseman,cut and hold
798,y2_d094_QZE_RZQ,135,shortstop,cut and hold
799,y2_d094_QZE_RZQ,175,third baseman,cut and hold
800,y2_d094_QZE_RZQ,333,shortstop,cut and hold


In [13]:
cutoff_df_clean['cutoff_decision'].value_counts()

cutoff_decision
cut and hold                       607
let                                 57
cut and throw to catcher            43
cut and relay to third baseman      31
cut and relay to second baseman     23
cut and relay to shortstop          21
cut and relay to first baseman      18
cut and relay to pitcher             2
Name: count, dtype: int64

In [14]:
cutoff_df_clean['cutoff_man'].value_counts()

cutoff_man
shortstop         320
second baseman    275
third baseman      91
catcher            57
first baseman      48
pitcher            11
Name: count, dtype: int64

In [12]:
obs_cuts = cutoff_df_clean.to_csv('cutoff_decisions.csv', index=False)