#### Calculating the Expected Value

In [1]:
import pandas as pd
import os 
import numpy as np
from scipy.interpolate import interp1d

os.chdir('C:/Users/dalto/OneDrive/Pictures/Documents/Projects/Coding Projects/woba modeling/data/')

#### Load Data

In [2]:
bat_tracking = pd.read_csv('quantile_predections/bat_tracking_pitch_m2.csv').drop(columns=['Unnamed: 0'])
ev_direction = pd.read_csv('quantile_predections/ev_dir_pitch.csv').drop(columns=['Unnamed: 0'])
full_predection = pd.read_csv('quantile_predections/full_preds.csv').drop(columns=['Unnamed: 0'])
ev_direction = ev_direction[ev_direction['year'] >= 2023]
full_predection = full_predection[full_predection['year'] >= 2023]

#### Sims

In [13]:
def expected_value(row):

    quantile_cols = [col for col in row.index if col.startswith('q_')]
    quantiles = [float(col.split('_')[1]) for col in quantile_cols]
    values = row[quantile_cols].values

    cdf_quantiles = np.concatenate(([0], quantiles, [1]))
    min_val = values[0] - (values[1] - values[0])
    max_val = values[-1] + (values[-1] - values[-2])
    cdf_values = np.concatenate(([min_val], values, [max_val]))

    diff_cdf_values = np.diff(cdf_values)
    epsilon = 1e-8
    diff_cdf_values = np.where(diff_cdf_values == 0, epsilon, diff_cdf_values)
    pdf_densities = np.diff(cdf_quantiles) / diff_cdf_values

    v_i = cdf_values[:-1]
    v_i_plus_1 = cdf_values[1:]

    expected_value = np.sum(pdf_densities * (v_i_plus_1**2 - v_i**2) / 2)
    return expected_value


In [3]:
def monte_carlo(row, sims=150000):
    quantile_cols = [col for col in row.index if col.startswith('q_')]
    quantiles = [float(col.split('_')[1]) for col in quantile_cols]
    values = row[quantile_cols].values

    cdf_quantiles = np.concatenate(([0], quantiles, [1]))
    min_val = values[0] - ((values[1] - values[0]))/2
    max_val = values[-1] + ((values[-1] - values[-2]))/2
    cdf_values = np.concatenate(([min_val], values, [max_val]))

    ppf_func = interp1d(cdf_quantiles, cdf_values, kind = 'cubic', bounds_error=False, fill_value=(min_val, max_val))

    random_quantiles = np.random.rand(sims)
    samples = ppf_func(random_quantiles)
    
    estimated_ev = np.mean(samples)

    return estimated_ev


#### Running Sims

In [7]:
ev_direction['monte'] = ev_direction.apply(monte_carlo, axis=1)

In [None]:
bat_tracking['monte'] = bat_tracking.apply(monte_carlo, axis=1)

In [18]:
full_predection['full'] = full_predection.apply(expected_value, axis=1)

In [4]:
full_predection['monte'] = full_predection.apply(monte_carlo, axis=1)

In [5]:
player_mean_bat = full_predection.groupby(['name', 'year'])['monte'].mean().reset_index(name='monte')
player_count_bat = full_predection.groupby(['name', 'year']).size().reset_index(name='count')

In [6]:
player = player_count_bat.merge(player_mean_bat, on=['name', 'year'], how='left')

In [7]:
player.to_csv('sim_results/full_wobacon_23_25.csv')