In [1]:
import pandas as pd
import numpy as np
%run 'Stat Calculator.py'

In [None]:
MLBsample = pd.read_csv('Sample Data.csv')

In [None]:
#Define Helper Functions To Get Differences Between
#When The Catcher Is Catching vs When He's Not
def wOBAdiffDef(samp,catcher):
    '''Input: A dataframe and a catcher's name
    Output: The difference in wOBA between when the catcher is catching
    and when he's not.
    
    (not catching - catching)'''
    return wOBA_calculator(samp[samp['C'] != catcher]['events']) - wOBA_calculator(samp[samp['C'] == catcher]['events'])
    
def OPSdiffDef(samp,catcher):
    '''Input: A dataframe and a catcher's name
    Output: The difference in OPS between when the catcher is catching
    and when he's not.
    
    (not catching - catching)'''
    return OPS_calculator(samp[samp['C'] != catcher]['events']) - OPS_calculator(samp[samp['C'] == catcher]['events'])

def significant(val,lst):
    '''Input:
        val- a value
        lst- a list of values
    
    Output:
        Boolean: True if the value is outside the 95% CI, False otherwise'''
    return (val < np.percentile(lst,2.5)) or (val > np.percentile(lst,97.5))

In [None]:
#Define Functions to Apply Across The Total Data

def byPitcher(df): 
    '''Applied to a DataFrame with and returns the results of 1000 simulations'''
    df = df[['C','events']] #Shorten data to keep only relevant points
    vals = df['C'].value_counts()
    val_catchers = [i for i in df['C'].unique() if vals[i] > 20] #only catchers who've caught 20+ PA's with pitcher
    wOBAsimulations = {catcher: [] for catcher in val_catchers}
    OPSsimulations = {catcher: [] for catcher in val_catchers}
    dff = df.copy()
    for i in range(1000): #Run 1000 Simulations
        dff['C'] = list(df['C'].sample(frac = 1))
        for catcher in val_catchers:
            wOBAsimulations[catcher] += [wOBAdiffDef(dff,catcher)]
            OPSsimulations[catcher] += [OPSdiffDef(dff,catcher)]
    #print('Running simulation...') #Delete the first hashtag if you want a way to see how much of the code has run
    return pd.DataFrame({'Plate Appearances': pd.Series({catcher:vals[catcher] for catcher in val_catchers}),
                         'wOBA with Catcher': pd.Series({catcher: wOBA_calculator(df[df['C']==catcher]['events']) for catcher in val_catchers}),
                         'wOBA without Catcher': pd.Series({catcher: wOBA_calculator(df[df['C'] != catcher]['events']) for catcher in val_catchers}),
                         'wOBA Difference': pd.Series({catcher:wOBAdiffDef(df,catcher) for catcher in val_catchers}),
                         'OPS with Catcher': pd.Series({catcher: OPS_calculator(df[df['C'] == catcher]['events']) for catcher in val_catchers}),
                         'OPS without Catcher': pd.Series({catcher: OPS_calculator(df[df['C'] != catcher]['events']) for catcher in val_catchers}),
                         'OPS Difference': pd.Series({catcher: OPSdiffDef(df,catcher) for catcher in val_catchers}),
                         'wOBA Significant': pd.Series({catcher: significant(wOBAdiffDef(df,catcher),wOBAsimulations[catcher]) for catcher in val_catchers}),
                         'OPS Significant': pd.Series({catcher: significant(OPSdiffDef(df,catcher),OPSsimulations[catcher]) for catcher in val_catchers})})

def filterPitcher(df):
    '''Returns a Boolean. True if the pitcher has 20+ PAs with multiple catchers this year'''
    cutoff = len(df) - 20
    catcherTotals = df.groupby('C').filter(lambda x: 20 <= len(x) <= cutoff)['C'].unique()
    return len(catcherTotals) > 1

In [None]:
#Filter out Pitchers who've thrown 20+ PA's to more than one catcher
filteredSample = MLBsample.dropna(subset = ['events']).groupby('P').filter(filterPitcher)

#Run Simulations for Every Pitcher in the Data (Took me around Seven Minutes to finish running this)
#There are 11 pitchers in the sample. If you delete the first hashtag of line 16 in the cell above,
#You can keep track of how many pitchers have finished running.
sampleSummary = filteredSample.groupby('P').apply(byPitcher)

In [None]:
#Show Results
sampleSummary