#### Calculating the Expected Value

In [54]:
import pandas as pd
import os 
import numpy as np
from scipy.interpolate import interp1d

os.chdir('C:/Users/dalto/OneDrive/Pictures/Documents/Projects/Coding Projects/woba modeling/data/')

#### Load Data

In [55]:
bat_tracking = pd.read_csv('quantile_predections/bat_tracking_pitch.csv').drop(columns=['Unnamed: 0'])
ev_direction = pd.read_csv('quantile_predections/ev_direction_pitch.csv').drop(columns=['Unnamed: 0'])
ev_direction = ev_direction[ev_direction['year'] >=2023]

#### Sims

In [56]:
def expected_value(row):

    quantile_cols = [col for col in row.index if col.startswith('q_')]
    quantiles = [float(col.split('_')[1]) for col in quantile_cols]
    values = row[quantile_cols].values

    cdf_quantiles = np.concatenate(([0], quantiles, [1]))
    min_val = values[0] - (values[1] - values[0])
    max_val = values[-1] + (values[-1] - values[-2])
    cdf_values = np.concatenate(([min_val], values, [max_val]))

    diff_cdf_values = np.diff(cdf_values)
    epsilon = 1e-8
    diff_cdf_values = np.where(diff_cdf_values == 0, epsilon, diff_cdf_values)
    pdf_densities = np.diff(cdf_quantiles) / diff_cdf_values

    v_i = cdf_values[:-1]
    v_i_plus_1 = cdf_values[1:]

    expected_value = np.sum(pdf_densities * (v_i_plus_1**2 - v_i**2) / 2)
    return expected_value


In [57]:
def monte_carlo(row, sims=100000):
    quantile_cols = [col for col in row.index if col.startswith('q_')]
    quantiles = [float(col.split('_')[1]) for col in quantile_cols]
    values = row[quantile_cols].values

    cdf_quantiles = np.concatenate(([0], quantiles, [1]))
    min_val = values[0] - (values[1] - values[0])
    max_val = values[-1] + (values[-1] - values[-2])
    cdf_values = np.concatenate(([min_val], values, [max_val]))

    ppf_func = interp1d(cdf_quantiles, cdf_values, kind = 'cubic', bounds_error=False, fill_value=(min_val, max_val))

    random_quantiles = np.random.rand(sims)
    samples = ppf_func(random_quantiles)
    
    estimated_ev = np.mean(samples)

    return estimated_ev


#### Running Sims

In [58]:
ev_direction['monte'] = ev_direction.apply(monte_carlo, axis=1)

In [59]:
bat_tracking['monte'] = bat_tracking.apply(monte_carlo, axis=1)

In [None]:
player_mean_ev = ev_direction.groupby(['name', 'year'])['monte'].mean().reset_index(name='monte')
player_count_ev = ev_direction.groupby(['name', 'year']).size().reset_index(name='count')
player_mean_bat = bat_tracking.groupby(['name', 'year'])['monte'].mean().reset_index(name='monte')
player_count_bat = bat_tracking.groupby(['name', 'year']).size().reset_index(name='count')

In [61]:
player = pd.merge(player_mean_ev, player_count_ev, on=['name', 'year'])
player = pd.merge(player, player_mean_bat, on=['name', 'year'])
player = player.rename(columns={'monte_x':'ev_dir','monte_y':'bat_tracking'})

In [None]:
player.to_csv('sim_results/wobacon_23_25.csv')