## **Before running this script, make sure to run ScrapeParkFactors to get current park factors**

In [None]:
import statsapi
from pandas import DataFrame
from statistics import fmean

import constants
import utils

In [None]:
PERIOD_START = input('Enter period start (YYYY-mm-dd): ')
PERIOD_END = input('Enter period end (YYYY-mm-dd): ')

Load input files

In [None]:
team_df:DataFrame = utils.load_dataframe(constants.TEAM_FILENAME)
team_df.head(2)

In [None]:
factors_df = utils.load_dataframe(constants.PARK_FACTORS_FILENAME)
factors_df.head(2)

### Calculate park factor for each player in given period

In [None]:
def get_relevant_factors(player_name, player_position):
    player_splits = 'All'
    if player_position not in constants.PITCHER_POSITIONS:
        player_bat_side = utils.lookup_player_stats(player_name)['bat_side']
        player_splits = constants.HITTER_FACTORS_MAPPING[player_bat_side]
    return factors_df.loc[factors_df['Splits'] == player_splits]

In [None]:
def get_player_scheduled_venues(period_start, period_end, team_name):
    team_id = utils.get_team_id(team_name)
    game_info = statsapi.schedule(start_date=period_start, end_date=period_end, team=team_id)
    game_ids = []
    venues = []
    for game in game_info:
        # Don't count the same game twice, e.g., if it was postponed and rescheduled
        if game['game_id'] in game_ids:
            continue
        venues.append(game['venue_name'])
        game_ids.append(game['game_id'])
    return venues

In [None]:
def get_average_park_factors(relevant_factors:DataFrame, scheduled_venues:list):
    all_factors = []
    for venue in scheduled_venues:
        try:
            factor = list(relevant_factors.loc[relevant_factors['Park Name'].str.upper() == venue.upper(), 'Runs'])[0]
        except IndexError:
            # Catch and ignore non-standard stadiums like Mexico City or Seoul
            continue
        all_factors.append(factor)
    return fmean(all_factors)

In [None]:
team_df['Relevant Factors'] = team_df.apply(lambda row: get_relevant_factors(row['Player'], row['Position']), axis=1)

In [None]:
team_df['Scheduled Games'] = team_df.apply(lambda row: get_player_scheduled_venues(PERIOD_START, PERIOD_END, row['Team']), axis=1)

In [None]:
team_df['Average Factors'] = team_df.apply(lambda row: get_average_park_factors(row['Relevant Factors'], row['Scheduled Games']), axis=1)

In [None]:
team_df.drop(columns=['Relevant Factors', 'Scheduled Games'], inplace=True)
utils.save_dataframe(team_df, f'team-with-park-factors_{PERIOD_START}-{PERIOD_END}.csv')