# M01. Park and Weather Factors

### Imports

In [15]:
if "running_pipeline" not in globals():
    print("Running imports...")
    %run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U4. Datasets.ipynb"
    print("Imports in.")
else:
    print("Imports already in.")

Running imports...
Imports in.


### Complete Dataset

In [18]:
# complete_dataset = create_pa_inputs(None, start_year=2013, end_year=2024, short=50, long=300, adjust=False)

### Base Rates

Calculate average stats in a given base year

In [19]:
def base_rates(df, base_year=2014):
    # Convert to datetime
    df['game_date'] = pd.to_datetime(df['game_date'])

    # Select period of interest
    df = df[df['game_date'].dt.year == base_year]

    # Calculate averages over period of interest
    base_rate_df = pd.DataFrame(df[events_list].mean()).T

    
    return base_rate_df

In [20]:
# base_rate_df = base_rates(complete_dataset, 2014)
# base_rate_df.to_csv(os.path.join(baseball_path, "Base Rates.csv"))

### Game Averages

Average rates within the game

In [21]:
def game_averages(df):    
    # Calculate averages by game
    game_avgs = df.groupby(['gamePk', 'game_date', 'venue_id', 'away_name', 'home_name', 'x_vect', 'y_vect', 'temperature'])[events_list].mean().reset_index()

    # Add the 'pas' column to count the number of observations in each group
    game_avgs['pas'] = df.groupby(['gamePk', 'game_date', 'venue_id', 'away_name', 'home_name', 'x_vect', 'y_vect', 'temperature']).size().values

    # Sort by date
    game_avgs.sort_values(['game_date'], ascending=True, inplace=True)
    
    return game_avgs

In [22]:
# game_average_df = game_averages(complete_dataset)

### Player Averages

Average stats of all the players in the game, coming into the game

In [23]:
def player_averages(df):
    # Note: these are already shifted if using create_pa_inputs from A02. We want the first PA for players in each game.
    # Stats to average
    batter_inputs_short = [f"{event}_b_long" for event in events_list]
    pitcher_inputs_short = [f"{event}_p_long" for event in events_list]

    # Apply stats from first at bat to entire game
    # First at bat has stats through end of last game
    # This ensures that no stats generated in-game are reflected
    # Note: we're doing this instead of dropping duplicates to properly weight by PA
    df[batter_inputs_short] = df.groupby(['gamePk', 'batter'])[batter_inputs_short].transform('first')
    df[pitcher_inputs_short] = df.groupby(['gamePk', 'pitcher'])[pitcher_inputs_short].transform('first')
    
    # Calculate player averages by game
    batter_avgs = df.groupby(['gamePk'])[batter_inputs_short].mean().reset_index()
    pitcher_avgs = df.groupby(['gamePk'])[pitcher_inputs_short].mean().reset_index()

    # Concatenate together
    player_avgs = pd.concat([batter_avgs, pitcher_avgs.drop(columns=['gamePk'])], axis=1)
    
    
    return player_avgs

In [24]:
# player_average_df = player_averages(complete_dataset)

### League Averages

In [25]:
def league_average(complete_dataset, days=30):
    # Calculate daily sum of events
    league_avg = complete_dataset.groupby('game_date')[events_list].sum().reset_index()
    # Calculate total events
    league_avg['pas'] = league_avg[events_list].sum(axis=1)
    
    # Loop over events (and pas)
    for event in events_list + ['pas']:
        # Create sum column
        league_avg[f'{event}_sum'] = None
        # Add up daily sums for the last {days} days
        for i in range(days, len(league_avg)):
            # Calculate the sum of the last {days} values excluding the current row
            league_avg.loc[i, f'{event}_sum'] = league_avg[f'{event}'].iloc[i-days:i].sum()

    # Calculate average
    for event in events_list:
        league_avg[f'{event}_lg'] = league_avg[f'{event}_sum'] / league_avg['pas_sum']
            
            
            
    
    return league_avg[["game_date"] + [col for col in league_avg if "_lg" in col]]

In [26]:
# league_average_df = league_average(complete_dataset, 30)
# league_average_df.tail()

### Park Factors

Helper function used to calculate weighted rolling averages, used to average game stats by PAs

In [27]:
def weighted_rolling_avg(values, weights, window, min_periods):
    # Compute the weighted rolling average using a sliding window
    result = (
        pd.Series(values)
        .rolling(window=window, min_periods=min_periods)
        .apply(lambda x: np.sum(x * weights[-len(x):]) / np.sum(weights[-len(x):]), raw=True)
    )
    return result.shift(1)  # Shift by 1 to exclude the current row


##### Park Averages

Average of stats over last park_window games - excluding game of interest

In [28]:
def park_averages(game_avgs, park_window, park_window_min):
    # Sort by venue and date
    park_avgs = game_avgs.sort_values(['game_date'], ascending=[True])
    
    # Calculate rolling averages by park
    park_avgs[events_list] = park_avgs.groupby('venue_id').apply(
    lambda group: pd.DataFrame({event: weighted_rolling_avg(group[event], group['pas'], park_window, park_window_min) for event in events_list})).reset_index(level=0, drop=True)
    
    
    return park_avgs

In [29]:
# park_average_df = park_averages(game_average_df, 243, 81)

##### Team Averages

In [30]:
def team_averages(game_avgs, park_window, park_window_min):
    # Sort by venue and date
    team_avgs = game_avgs.sort_values(['game_date'], ascending=[True])
    
    # Calculate rolling averages by park
    team_avgs[events_list] = team_avgs.groupby('away_name').apply(
    lambda group: pd.DataFrame({event: weighted_rolling_avg(group[event], group['pas'], park_window, park_window_min) for event in events_list})).reset_index(level=0, drop=True)
    
    return team_avgs

In [31]:
# team_average_df = team_averages(game_average_df, 243, 81)

##### Park Factors

In [32]:
def create_park_factors(park_avgs, team_avgs):
    # Sort by game_date
    park_avgs = park_avgs.sort_values('game_date')
    team_avgs = team_avgs.sort_values('game_date')

    # Create uniform team_name variable equal to name of interest
    park_avgs['team_name'] = park_avgs['home_name'].copy()
    team_avgs['team_name'] = team_avgs['away_name'].copy()

    # Set to datetime
    park_avgs['game_date'] = pd.to_datetime(park_avgs['game_date'])
    team_avgs['game_date'] = pd.to_datetime(team_avgs['game_date'])
    
    # Perform merge_asof
    park_factor_df = pd.merge_asof(park_avgs, team_avgs, left_on='game_date', right_on='game_date', by='team_name', direction='backward', suffixes=('_park', '_team'))

    # Calculate park factors
    for stat in events_list:
        park_factor_df[f'{stat}_pfx'] = park_factor_df[f'{stat}_park'] / park_factor_df[f'{stat}_team'] 
        
    park_factor_df.rename(columns={'gamePk_park': 'gamePk'}, inplace=True)
    keep_columns = ['gamePk'] + [col for col in park_factor_df.columns if col.endswith('pfx')]
    
    return park_factor_df[keep_columns]

In [33]:
# park_factor_df = create_park_factors(park_average_df, team_average_df)

### Analysis 

Merge together game averages, player averages, and park factors

In [34]:
def create_analysis_df(complete_dataset, league_average_df, park_factor_df):
    # Merge on league averages
    analysis_df = pd.merge(complete_dataset, league_average_df, on=['game_date'], how='inner')
    # Merge on park factors
    analysis_df = pd.merge(analysis_df, park_factor_df, on='gamePk', how='inner')
   
    
    # Extract dummies from venues
    venue_dummy_df = pd.get_dummies(analysis_df['venue_id'], prefix='venue')
    # Extract dummy column names
    venue_dummies = list(venue_dummy_df.columns)
    
    # Add in dummies
    analysis_df = pd.concat([analysis_df, venue_dummy_df], axis=1)
    
    # Select variables to keep
    variables = ['x_vect', 'y_vect', 'temperature'] + venue_dummies
    # Loop over events
    for event in events_list: 
        # Define the dependent variable (e.g., `b1`) and independent variables
        variables += [f'{event}_b_long', f'{event}_p_long', f'{event}_pfx']
    
    # Select relevant variables and drop missings
    analysis_df = analysis_df[["eventsModel", 'gamePk', 'game_date', 'venue_id', 'away_name', 'home_name'] + variables + [col for col in analysis_df if col.endswith("_lg")]].dropna()
    
    # Remove cut
    analysis_df = analysis_df[analysis_df['eventsModel'] != "Cut"]
    
    
    return analysis_df, venue_dummies

In [35]:
# analysis_df, venue_dummies = create_analysis_df(game_average_df, player_average_df, park_factor_df)

### Train Models

$\hat{event}$ = event_b_long + event_p_long + event_pfx + x_vect + y_vect + temperature + venue_dummies

In [36]:
def train_models(analysis_df, venue_dummies, batSide, layers):
    # Identify inputs
    variables = ['x_vect', 'y_vect', 'temperature'] + venue_dummies
    # Loop over events
    for event in events_list: 
        # Define the dependent variable (e.g., `b1`) and independent variables
        variables += [f'{event}_b_long', f'{event}_p_long', f'{event}_pfx']
    
    # Prepare
    X = analysis_df[variables].values  # Independent variables
    y = analysis_df['eventsModel'].values  # Dependent variable

    # Define three neural network models with slightly different configurations
    nn_model_1 = MLPClassifier(hidden_layer_sizes=layers,activation='relu',solver='adam',max_iter=10,random_state=1)
    nn_model_2 = MLPClassifier(hidden_layer_sizes=layers,activation='relu',solver='adam',max_iter=10,random_state=2)
    nn_model_3 = MLPClassifier(hidden_layer_sizes=layers,activation='relu',solver='adam',max_iter=10,random_state=3)

    # Create a Voting Classifier with the three models
    voting_model = VotingClassifier([('nn1', nn_model_1),('nn2', nn_model_2),('nn3', nn_model_3)], voting='soft')

    # Train the Voting Regressor
    voting_model.fit(X, y)

    # Create directory for saving the model
    os.makedirs(os.path.join(model_path, "M01. Park and Weather Factors", todaysdate), exist_ok=True)

    # Save the Voting Classifier
    with open(os.path.join(model_path, "M01. Park and Weather Factors", todaysdate, f"predict_wfx_{batSide.lower()}.pkl"), 'wb') as f:
        pickle.dump(voting_model, f)

    print(f"Voting model for {batSide}HB saved successfully.")

In [37]:
# train_models(analysis_df, venue_dummies, "L", (2,))

### Run Predictions

In [48]:
def run_predictions(df, base_rate_df, model_date, batSide):
    variables = ['x_vect', 'y_vect', 'temperature'] + venue_dummies
    # Loop over events
    for event in events_list: 
        # Define the dependent variable (e.g., `b1`) and independent variables
        variables += [f'{event}_b_long', f'{event}_p_long', f'{event}_pfx']
    
    
    # Make predictions
    # Path to the saved model for
    saved_model_path = os.path.join(model_path, "M01. Park and Weather Factors", model_date, f"predict_wfx_{batSide.lower()}.pkl")

    # Load the model
    with open(saved_model_path, 'rb') as f:
        model = pickle.load(f)

    # Create input dataframe
    X = df.copy()

    for event in events_list:
        # Use league averages to predict (NOT BASE RATES) 
        X[f'{event}_b_long'] = X[f'{event}_lg'].astype(float).copy()
        X[f'{event}_p_long'] = X[f'{event}_lg'].astype(float).copy()

        
    # Identify inputs
    variables = ['x_vect', 'y_vect', 'temperature'] + venue_dummies
    # Loop over events
    for event in events_list: 
        # Define the dependent variable (e.g., `b1`) and independent variables
        variables += [f'{event}_b_long', f'{event}_p_long', f'{event}_pfx']

    # Extract the feature data
    X = X[variables]
    
    # Predict using the loaded model
    class_list = list(model.classes_)
    prediction_columns = [f"{event}_pred" for event in class_list]
    prediction_df = pd.DataFrame(model.predict_proba(X), columns=prediction_columns)
    
    # Append 
    df.reset_index(drop=True, inplace=True)
    df = pd.concat([df, prediction_df], axis=1)

    # Calculate wfx
    for event in events_list:
        # Compare to base year (NOT LEAGUE AVERAGE)
        df[f'{event}_wfx'] = df[f'{event}_pred'] / base_rate_df[event][0]
    
        
    return df

### Multiplier Dataset

In [46]:
train = False
model_date = "20241204"

##### Prepare

Note: You only have to prepare once even if you retrain the models

Read in complete dataset

In [40]:
%%time
complete_dataset = create_pa_inputs(None, 2013, 2024, short=50, long=300, adjust=False)

CPU times: total: 4min 18s
Wall time: 4min 25s


In [41]:
complete_dataset['temperature'] = complete_dataset.apply(lambda row: 70 if 'Roof' in row['weather'] or 'Dome' in row['weather'] else row['temperature'], axis=1)

Generate or read base rates

In [42]:
### Generate base rates (base year = 2014)
# Only needs to be run once
# Generate:
# base_rate_df = base_rates(complete_dataset, 2014)
# base_rate_df.to_csv(os.path.join(baseball_path, "Base Rates.csv"), index=False)

# Read: 
base_rate_df = pd.read_csv(os.path.join(baseball_path, "Base Rates.csv"))

In [43]:
# List of dataframes
analysis_df_list = []
# Loop over batter sides
for batSide in ['L', 'R']:
    print(batSide)
    # Subset complete dataset
    complete_dataset_side = complete_dataset[complete_dataset['batSide'] == batSide]
    # Calculate game averages (average rates within a particular games)
    game_average_df = game_averages(complete_dataset_side)
    # # Calculate player averages (average rates of all players coming into the game)
    # player_average_df = player_averages(complete_dataset_side)
    # Calculate league averages (average rates of all PAs over last n days coming into the day)
    league_average_df = league_average(complete_dataset_side, days=30)
    # Average rates at park over last n games (both teams)
    park_average_df = park_averages(game_average_df, 243, 81)
    # Average rates at away games over last n games (both teams)
    team_average_df = team_averages(game_average_df, 243, 81)
    # Park factors
    park_factor_df = create_park_factors(park_average_df, team_average_df)
    # Create dataframe that can be used to train and analyze data
    analysis_df, venue_dummies = create_analysis_df(complete_dataset, league_average_df, park_factor_df)
    analysis_df_list.append(analysis_df)

L
R


##### Train

Rerun this when you want to retrain models

In [49]:
%%time
wfx_df_list = []
for batSide in ['L', 'R']:
    if batSide == 'L':
        analysis_df = analysis_df_list[0].copy()
    else:
        analysis_df = analysis_df_list[1].copy()
    
    # Drop missings
    analysis_df = analysis_df.dropna()
    
    # Train models
    if train == True:
        train_models(analysis_df, venue_dummies, batSide, layers=(38,38,38,38,38))
        
    # Create dataset with wfx
    wfx_df = run_predictions(analysis_df, base_rate_df, model_date, batSide)
    wfx_df_list.append(wfx_df)

CPU times: total: 4min 6s
Wall time: 35.8 s


Separate dataframes

In [50]:
lhb_df = wfx_df_list[0].copy()
rhb_df = wfx_df_list[1].copy()

Scale predictions:
- Numerator: Predicted rate
- Denominator: Sum of all event predicted rates (should be close to one, but won't be exact)

In [51]:
# List of predictions
pred_list = ['b1_pred', 'b2_pred', 'b3_pred', 'hr_pred', 'bb_pred', 'hbp_pred', 'so_pred', 'fo_pred', 'go_pred', 'lo_pred', 'po_pred']

# Sum of prediction odds
lhb_df['pred_sum'] = lhb_df[pred_list].sum(axis=1)
rhb_df['pred_sum'] = rhb_df[pred_list].sum(axis=1)

# Scaled
for event in pred_list:
    lhb_df[event] = lhb_df[event] / lhb_df['pred_sum']
    rhb_df[event] = rhb_df[event] / rhb_df['pred_sum']

Columns to keep

In [52]:
keep_list = ['gamePk', 'game_date', 'venue_id', 'away_name', 'home_name', 'x_vect', 'y_vect', 'temperature']
pfx_list = [col for col in wfx_df_list[0].columns if col.endswith('pfx')]
wfx_list = [col for col in wfx_df_list[0].columns if col.endswith('wfx')]
pred_list = [col for col in wfx_df_list[0].columns if col.endswith('_pred')]

In [53]:
event_dummies = pd.get_dummies(lhb_df['eventsModel']).astype(int)
lhb_df2 = pd.concat([lhb_df, event_dummies], axis=1)
lhb_df2 = lhb_df2.groupby(keep_list)[events_list + pfx_list + wfx_list + pred_list].mean(numeric_only=True).reset_index()

event_dummies = pd.get_dummies(rhb_df['eventsModel']).astype(int)
rhb_df2 = pd.concat([rhb_df, event_dummies], axis=1)
rhb_df2 = rhb_df2.groupby(keep_list)[events_list + pfx_list + wfx_list + pred_list].mean(numeric_only=True).reset_index()

Create dataset

In [98]:
multiplier_df = pd.merge(lhb_df2, rhb_df2, on=keep_list, how='inner', suffixes=('_l', '_r'))

Read in game_df

In [68]:
%%time
game_df = create_games("20220101", todaysdate, team_map)

CPU times: total: 5.3 s
Wall time: 1min 10s


In [99]:
game_df['date'] = game_df['date'].astype(int).astype(str)

Add date

Note: game_date currently in multiplier_df will have original date in cases of postponements. date in game_df will have the correct date.

In [100]:
multiplier_df = multiplier_df.merge(game_df[['game_id', 'date']], left_on='gamePk', right_on=['game_id'], how='left')

Write to CSV

In [102]:
multiplier_df.sort_values(['date', 'gamePk'], ascending=[True, True]).to_csv(os.path.join(baseball_path, "Multiplier Dataset.csv"), index=False)

### Evaluations

##### Rates by Quantile

In [None]:
# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(12, 9))  # 3 rows, 2 columns

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Iterate through the events and their corresponding axes
for idx, event in enumerate(events_list):
    ax = axes[idx]  # Select the appropriate subplot
    
    # Step 1: Create quantile buckets for the current event
    lhb_df2['quantile'] = pd.qcut(lhb_df2[f'{event}_pred'], q=10, labels=False)  # 10 quantiles (adjust q as needed)
    
    # Step 2: Group by quantiles and calculate the mean
    quantile_means = lhb_df2.groupby('quantile').agg({f'{event}_pred': 'mean', event: 'mean'}).reset_index()
    
    # Step 3: Plot the predictions and actuals
    ax.plot(quantile_means['quantile'], quantile_means[f'{event}_pred'], label=f'Average {event}_pred', marker='o')
    ax.plot(quantile_means['quantile'], quantile_means[event], label=f'Average {event}', marker='x')
    
    # Add subplot details
    ax.set_title(f'{event} Predictions vs Actuals')
    ax.set_xlabel('Quantile')
    ax.set_ylabel('Average Value')
    ax.legend()
    ax.grid()

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()

In [None]:
# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(12, 9))  # 3 rows, 2 columns

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Iterate through the events and their corresponding axes
for idx, event in enumerate(events_list):
    ax = axes[idx]  # Select the appropriate subplot
    
    # Step 1: Create quantile buckets for the current event
    rhb_df2['quantile'] = pd.qcut(rhb_df2[f'{event}_pred'], q=10, labels=False)  # 10 quantiles (adjust q as needed)
    
    # Step 2: Group by quantiles and calculate the mean
    quantile_means = rhb_df2.groupby('quantile').agg({f'{event}_pred': 'mean', event: 'mean'}).reset_index()
    
    # Step 3: Plot the predictions and actuals
    ax.plot(quantile_means['quantile'], quantile_means[f'{event}_pred'], label=f'Average {event}_pred', marker='o')
    ax.plot(quantile_means['quantile'], quantile_means[event], label=f'Average {event}', marker='x')
    
    # Add subplot details
    ax.set_title(f'{event} Predictions vs Actuals')
    ax.set_xlabel('Quantile')
    ax.set_ylabel('Average Value')
    ax.legend()
    ax.grid()

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()

##### Yearly Trends

In [None]:
lhb_df2['year'] = lhb_df2['game_date'].str[:4]
event = 'hr'
lhb_df2.groupby('year')[[event, f'{event}_pred',  f'{event}_pfx', f'{event}_wfx']].mean()

##### Park Differences

In [None]:
rhb_df2['safe'] = rhb_df2[['b1', 'b2', 'b3', 'hr', 'bb', 'hbp']].sum(axis=1)
rhb_df2['out'] = rhb_df2[['so', 'go', 'lo', 'po', 'fo']].sum(axis=1)
rhb_df2['safe_pred'] = rhb_df2[['b1_pred', 'b2_pred', 'b3_pred', 'hr_pred', 'bb_pred', 'hbp_pred']].sum(axis=1)
rhb_df2['out_pred'] = rhb_df2[['so_pred', 'go_pred', 'lo_pred', 'po_pred', 'fo_pred']].sum(axis=1)

lhb_df2['safe'] = lhb_df2[['b1', 'b2', 'b3', 'hr', 'bb', 'hbp']].sum(axis=1)
lhb_df2['out'] = lhb_df2[['so', 'go', 'lo', 'po', 'fo']].sum(axis=1)
lhb_df2['safe_pred'] = lhb_df2[['b1_pred', 'b2_pred', 'b3_pred', 'hr_pred', 'bb_pred', 'hbp_pred']].sum(axis=1)
lhb_df2['out_pred'] = lhb_df2[['so_pred', 'go_pred', 'lo_pred', 'po_pred', 'fo_pred']].sum(axis=1)


In [None]:
park_error = rhb_df2[rhb_df2['venue_id'].astype('int').isin(team_map['VENUE_ID'])].groupby('venue_id')[['safe', 'safe_pred']].mean()
park_error['diff'] = park_error['safe'] - park_error['safe_pred']
park_error.sort_values('diff')

##### Park and Park x Weather Effects

In [None]:
rhb_df2.drop_duplicates('venue_id', keep='last')[['venue_id'] + [col for col in rhb_df2.columns if col.endswith("_pfx")] + [col for col in rhb_df2.columns if col.endswith("_wfx")]].sort_values('venue_id')

### Generate Park and Weather Factors files

In [85]:
multiplier_df = pd.read_csv(os.path.join(baseball_path, "Multiplier Dataset.csv"))

In [105]:
# Select columns to keep
keep_columns = ['gamePk', 'game_date', 'date', 'venue_id', 'away_name', 'home_name', 'x_vect', 'y_vect', 'temperature'] + [col for col in multiplier_df.columns if "_wfx" in col]
    
multiplier_df.sort_values('date', inplace=True)
for date in multiplier_df[pd.to_datetime(multiplier_df['game_date']).dt.year >= 2022]['date'].unique():
    print(date)
    if date > "20220101":
        # Subset by date
        daily_weather_df = multiplier_df[multiplier_df['date'] == date][keep_columns]

        # Write to CSV
        daily_weather_df.to_csv(os.path.join(baseball_path, "A06. Weather", "4. Park and Weather Factors", f"{date} Park and Weather Factors.csv"), index=False)

20220407
20220408
20220409
20220410
20220411
20220412
20220413
20220414
20220415
20220416
20220417
20220418
20220419
20220420
20220421
20220422
20220423
20220424
20220425
20220426
20220427
20220428
20220429
20220430
20220501
20220502
20220503
20220504
20220505
20220506
20220507
20220508
20220509
20220510
20220511
20220512
20220513
20220514
20220515
20220516
20220517
20220518
20220519
20220520
20220521
20220522
20220523
20220524
20220525
20220526
20220527
20220528
20220529
20220530
20220531
20220601
20220602
20220603
20220604
20220605
20220606
20220607
20220608
20220609
20220610
20220611
20220612
20220613
20220614
20220615
20220616
20220617
20220618
20220619
20220620
20220621
20220622
20220623
20220624
20220625
20220626
20220627
20220628
20220629
20220630
20220701
20220702
20220703
20220704
20220705
20220706
20220707
20220708
20220709
20220710
20220711
20220712
20220713
20220714
20220715
20220716
20220717
20220721
20220722
20220723
20220724
20220725
20220726
20220727
20220728
20220729
2

### Note: Rerun A11. Matchups.ipynb if new historic Park x Weather Effects are generated