In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from scipy.ndimage import gaussian_filter
import tqdm
from joblib import Parallel, delayed
from markov_functions import *
from itertools import product
import warnings


In [2]:
hits = ['1B', '2B', '3B', 'HR', 'IHR', 'H']
bbs = {'uBB', 'IBB'}
hbps = {'HBP'}
non_ab_results = {'SH', 'SF', 'uBB', 'IBB', 'HBP', 'IH', 'IR', 'ID'}
tb_map = {'1B': 1, 'H': 1, '2B': 2, '3B': 3, 'HR': 4, 'IHR': 4}
_swing_tokens = {'SW', 'F', 'FT', 'FOUL_BUNT', 'TRY_BUNT', 'BUNT', 'H'}
_fastball_tokens = {'FF', 'SI', 'FC'} # four-seam, sinker, cutter
_offspeed_tokens = {'CH', 'FO', 'FS', 'KN', 'EP'} # changeup, forkball, split-finger, knuckleball
_breaking_tokens = {'CU', 'SL'} # curveball, slider
pitch_types = ['fastball', 'offspeed', 'breaking']

In [3]:
pa_pitches_filename = './data/paired_filtered.csv'  # or provide a full path

if not os.path.exists(pa_pitches_filename):
    csv_files = [f for f in os.listdir('.') if f.lower().endswith('.csv')]
    if not csv_files:
        raise FileNotFoundError(f"{pa_pitches_filename!r} not found and no CSV files in the current directory.")
    filename = csv_files[0]
    print(f"No file named 'data.csv' found. Using first CSV in cwd: {filename}")



pas = pd.read_csv(pa_pitches_filename)
pas.drop(['pa_seq', 'bases', 'velocities_events', 'pitchCodes_events'], axis=1, inplace=True)
print(f"Loaded {len(pas)} rows and {len(pas.columns)} columns from {pa_pitches_filename}")
# pas.head()


Loaded 27600 rows and 12 columns from ./data/paired_filtered.csv


In [4]:

pitchers_list = ['勝騎士', '古林睿煬', '艾璞樂', '肯特', '威能帝']

batter = pas[pas['batterName'].isin(pas['batterName'].unique())]
opposite_batter = batter[batter['batterHand'] != batter['pitcherHand']]
samehand_batter = batter[batter['batterHand'] == batter['pitcherHand']]

for pitcher_n in pitchers_list:
    for oppo in [True, False]:
        
        print(f'Sample {pitcher_n}, opposite-handed {oppo}')
        pitcher_name = pitcher_n
        pitcher = pas[pas['pitcherName']==pitcher_name]


        opposite = oppo

        counts = {
            'ball': [0, 1, 2, 3],
            'strike': [0, 1, 2],
        }

        pitcher_events, pitcher_event_list = get_pitches_with_counts(pitcher, opposite_hand=opposite, **counts)
        if opposite == False:
            batter_events, batter_event_list = get_pitches_with_counts(samehand_batter, opposite_hand=False, **counts)
        elif opposite == True:
            batter_events, batter_event_list = get_pitches_with_counts(opposite_batter, opposite_hand=True, **counts)
        else:
            batter_events, batter_event_list = get_pitches_with_counts(batter, opposite_hand=opposite, **counts)
            
            
        n_pitch = 0
        strike = 0
        ball = 0
        counts = f'{ball}-{strike}'

        pa_end = False
        good_ending = False


        situation_params_init = {
            'pitch_type_last': None,
            'coords_quadrant_last': None,
            'swing_last': None,
            'whiff_last': None,
            'pitch_type_last2': None,
            'coords_quadrant_last2': None,
            'swing_last2': None,
            'whiff_last2': None
        }
        pitchtype_map, swing_map, whiff_map, inplay_map, soft_map, called_strike_zone = counts_prob(counts, pitcher_event_list, batter_event_list, situation_params=situation_params_init)

        situation_params = situation_params_init.copy()

        store_result = f'{pitcher_name}_oppo_{oppo}_new.csv'
        if os.path.exists(store_result): os.remove(store_result)



        def simulate_pa(_):
            # Initialize variables for a single plate appearance
            n_pitch = 0
            strike = 0
            ball = 0
            pa_end = False
            good_ending = False
            situation_params = situation_params_init.copy()
            
            pitch_coord_sequence = []
            pitch_types_sequence = []
            pitch_results_sequence = []
            ending_type = None

            # Initial probability maps for 0-0 count
            pitchtype_map, swing_map, whiff_map, inplay_map, soft_map, called_strike_zone = counts_prob(
                '0-0', pitcher_event_list, batter_event_list, situation_params=situation_params
            )

            while not pa_end:
                sampled_pitch = sample_pitch(pitchtype_map)
                (x_idx, y_idx, pitchtype), (x_sampled, y_sampled, pitchtype) = sampled_pitch
                pitch_coord_sequence.append((x_sampled, y_sampled))
                pitch_types_sequence.append(pitch_types[pitchtype])
                n_pitch += 1

                if prob_determine(swing_map, x_idx, y_idx, pitchtype): # swing
                    situation_params = write_situation(situation_params=situation_params, pitchtype=pitch_types[pitchtype], x=x_sampled, y=y_sampled, swing=True, whiff=prob_determine(whiff_map, x_idx, y_idx, pitchtype))
                    if situation_params['whiff_last']: # whiff
                        pitch_results_sequence.append('WHIFF')
                        if strike < 2:
                            strike += 1
                        else:
                            pa_end, good_ending, ending_type = True, True, 'strikeout'
                    else: # contact
                        if prob_determine(inplay_map, x_idx, y_idx, pitchtype): # in play
                            pa_end = True
                            if prob_determine(soft_map, x_idx, y_idx, pitchtype): # soft contact
                                good_ending, ending_type = True, 'soft-inplay'
                                pitch_results_sequence.append('SOFT-INPLAY')
                            else: # hard contact
                                good_ending, ending_type = False, 'hard-inplay'
                                pitch_results_sequence.append('HARD-INPLAY')
                        else: # foul
                            pitch_results_sequence.append('FOUL')
                            if strike < 2: strike += 1
                else: # no swing
                    situation_params = write_situation(situation_params=situation_params, pitchtype=pitch_types[pitchtype], x=x_sampled, y=y_sampled, swing=False, whiff=False)
                    if prob_determine(called_strike_zone, x_idx, y_idx, pitchtype): # called strike
                        pitch_results_sequence.append('CALLED-STRIKE')
                        if strike < 2:
                            strike += 1
                        else:
                            pa_end, good_ending, ending_type = True, True, 'strikeout'
                    else: # ball
                        pitch_results_sequence.append('BALL')
                        if ball < 3:
                            ball += 1
                        else:
                            pa_end, good_ending, ending_type = True, False, 'walk'

                if not pa_end:
                    counts = f'{ball}-{strike}'
                    fallback_strategies = [
                        {},
                        {'coords_quadrant_last2': None},
                        {'coords_quadrant_last2': None, 'swing_last2': None, 'whiff_last2': None},
                        {'pitch_type_last2': None, 'coords_quadrant_last2': None, 'swing_last2': None, 'whiff_last2': None},
                        {'pitch_type_last2': None, 'coords_quadrant_last2': None, 'swing_last2': None, 'whiff_last2': None, 'coords_quadrant_last': None},
                        {'pitch_type_last2': None, 'coords_quadrant_last2': None, 'swing_last2': None, 'whiff_last2': None, 'coords_quadrant_last': None, 'swing_last': None, 'whiff_last': None},
                    ]

                    for i, strategy in enumerate(fallback_strategies):
                        with warnings.catch_warnings():
                            warnings.simplefilter("error", RuntimeWarning)
                            try:
                                params_copy = situation_params.copy()
                                params_copy.update(strategy)
                                pitchtype_map, swing_map, whiff_map, inplay_map, soft_map, called_strike_zone = counts_prob(
                                    counts, pitcher_event_list, batter_event_list, situation_params=params_copy
                                )
                                break  # Success, exit the loop
                            except RuntimeWarning:
                                if i == len(fallback_strategies) - 1:
                                    # If all strategies fail, use the initial empty context
                                    pitchtype_map, swing_map, whiff_map, inplay_map, soft_map, called_strike_zone = counts_prob(
                                        counts, pitcher_event_list, batter_event_list, situation_params=situation_params_init
                                    )
                                continue # Try the next strategy

            return {
                'pitch_coord_sequence': pitch_coord_sequence,
                'pitch_types_sequence': pitch_types_sequence,
                'pitch_results_sequence': pitch_results_sequence,
                'ending_type': ending_type,
                'good_ending': good_ending
            }


        n_PA = 1000
        # Use about 75% of available CPUs to leave resources for other tasks
        num_processes = max(1, 6)

        if __name__ == '__main__':
            results = Parallel(n_jobs=num_processes)(
                delayed(simulate_pa)(_) for _ in tqdm.tqdm(range(n_PA), total=n_PA)
            )

            result_df = pd.DataFrame(results)
            result_df.to_csv(store_result, index=False)


Sample 勝騎士, opposite-handed True


100%|██████████| 1000/1000 [07:42<00:00,  2.16it/s]


Sample 勝騎士, opposite-handed False


100%|██████████| 1000/1000 [06:46<00:00,  2.46it/s]


Sample 古林睿煬, opposite-handed True


100%|██████████| 1000/1000 [07:49<00:00,  2.13it/s]


Sample 古林睿煬, opposite-handed False


100%|██████████| 1000/1000 [06:46<00:00,  2.46it/s]


Sample 艾璞樂, opposite-handed True


100%|██████████| 1000/1000 [07:34<00:00,  2.20it/s]


Sample 艾璞樂, opposite-handed False


100%|██████████| 1000/1000 [06:47<00:00,  2.45it/s]


Sample 肯特, opposite-handed True


100%|██████████| 1000/1000 [08:04<00:00,  2.06it/s]


Sample 肯特, opposite-handed False


100%|██████████| 1000/1000 [06:53<00:00,  2.42it/s]


Sample 威能帝, opposite-handed True


100%|██████████| 1000/1000 [07:35<00:00,  2.20it/s]


Sample 威能帝, opposite-handed False


100%|██████████| 1000/1000 [06:42<00:00,  2.48it/s]
