# A06. Weather 

Note: All historic Park and Weather Factors files are created from M01. Park and Weatehr Factors.ipynb upon the training of new models. A06. Weather is for daily files only.

### Imports

In [11]:
import sys
if not hasattr(sys.modules['__main__'], '__file__'):
    %run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U4. Datasets.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U5. Models.ipynb"
    print("Imports executed")

Imports executed


### Games

In [12]:
if not hasattr(sys.modules['__main__'], '__file__'):
    # Set date range 
    start_date = todaysdate
    end_date = todaysdate
    all_game_df = pd.read_csv(os.path.join(baseball_path, "game_df.csv"))
    game_df = all_game_df[(all_game_df['date'].astype(str) >= start_date) & (all_game_df['date'].astype(str) <= end_date)].reset_index(drop=True)

### Venue

Merge in venue-specific data

In [13]:
game_df = pd.merge(game_df, venue_map_df[['id', 'location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude',
                                          'fieldInfo.leftLine', 'fieldInfo.center', 'fieldInfo.rightLine', 'fieldInfo.leftCenter', 'fieldInfo.rightCenter', 
                                          'location.elevation', 'location.azimuthAngle', 'fieldInfo.roofType', 'active']], 
                                           left_on=['venue_id'], right_on=['id'], how='left')

Convert to datetime

In [14]:
# game_df["game_datetime"] = pd.to_datetime(game_df["game_datetime"])

Drop if missing coordinates

In [15]:
game_df.dropna(subset=['location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude'], inplace=True)

### Functions

##### 1. Open Meteo

Historic

In [6]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

def fetch_historical_weather_data(latitude, longitude, game_datetime):
    """Fetch historical weather data for a given game datetime and location."""

    # Convert game_datetime to date for API request
    game_date = game_datetime.strftime("%Y-%m-%d")

    # Define the parameters for the weather request
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": game_date,
        "end_date": game_date,
        "hourly": [
            "temperature_2m", "relative_humidity_2m", "dew_point_2m", 
            "weather_code", "surface_pressure", "wind_speed_10m", "wind_direction_10m"
        ],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "precipitation_unit": "inch"
    }

    # Fetch data from Open-Meteo API
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]

    # Process hourly data
    hourly = response.Hourly()
    hourly_data = {
        "datetime": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(),
        "dew_point_2m": hourly.Variables(2).ValuesAsNumpy(),
        "weather_code": hourly.Variables(3).ValuesAsNumpy(),
        "surface_pressure": hourly.Variables(4).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(5).ValuesAsNumpy(),
        "wind_direction_10m": hourly.Variables(6).ValuesAsNumpy()
    }

    return pd.DataFrame(hourly_data)

def create_historic_weather_df(game_df):
    """Append weather data to each game in game_df based on game_datetime."""

    # Convert game_datetime to UTC
    game_df["game_datetime"] = pd.to_datetime(game_df["game_datetime"], utc=True)

    # Lists to store the matched weather data
    weather_columns = [
        "temperature_2m", "relative_humidity_2m", "dew_point_2m",
        "weather_code", "surface_pressure", "wind_speed_10m", "wind_direction_10m"
    ]
    weather_data_lists = {col: [] for col in weather_columns}

    # Loop through each game in the DataFrame
    for _, row in game_df.iterrows():
        latitude = row["location.defaultCoordinates.latitude"]
        longitude = row["location.defaultCoordinates.longitude"]
        game_datetime = row["game_datetime"]

        # Fetch historical weather data for that day
        weather_data = fetch_historical_weather_data(latitude, longitude, game_datetime)

        # Find the closest weather timestamp to game_datetime (typically, first top of the hour after game starts)
        closest_weather_row = weather_data.iloc[
            (weather_data["datetime"] - game_datetime).abs().argsort()[0]
        ]

        # Append the closest weather data to lists
        for col in weather_columns:
            weather_data_lists[col].append(closest_weather_row[col])

    # Add the weather data as new columns in game_df
    for col in weather_columns:
        game_df[col] = weather_data_lists[col]


    return game_df

Forecast

In [7]:
# Make sure it doesn't have issues with double headers

In [8]:
def fetch_weather_data(latitude, longitude, start, end):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", 
                   "precipitation_probability", "surface_pressure", 
                   "wind_speed_10m", "wind_direction_10m", "weather_code"],
        "start": start,  # Start time of forecast
        "end": end,  # End time of forecast
        "wind_speed_unit": "mph",
        "temperature_unit": "fahrenheit",
        "precipitation_unit": "inch",
        "past_days": 2
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process the weather data
    response = responses[0]
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_precipitation_probability = hourly.Variables(3).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(4).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(5).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()

    # Create the DataFrame with the weather data
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )
    }

    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["precipitation_probability"] = hourly_precipitation_probability
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
    hourly_data["weather_code"] = hourly_weather_code

    hourly_dataframe = pd.DataFrame(data=hourly_data)

    # Filter the data to only include rows within the requested time range
    hourly_dataframe["date"] = pd.to_datetime(hourly_dataframe["date"], utc=True)
    hourly_dataframe = hourly_dataframe[
        (hourly_dataframe["date"] >= pd.to_datetime(start, utc=True)) & 
        (hourly_dataframe["date"] <= pd.to_datetime(end, utc=True))
    ]

    return hourly_dataframe


# Now iterate over each game in game_df to fetch the weather data
def create_daily_weather_df(game_df):
    # Lists to hold the weather data columns
    temperature_2m_list = []
    relative_humidity_2m_list = []
    dew_point_2m_list = []
    precipitation_probability_list = []
    surface_pressure_list = []
    wind_speed_10m_list = []
    wind_direction_10m_list = []
    weather_code_list = []

    # Iterate over the rows of game_df
    for index, row in game_df.iterrows():
        latitude = row["location.defaultCoordinates.latitude"]
        longitude = row["location.defaultCoordinates.longitude"]
        game_datetime = pd.to_datetime(row["game_datetime"])

        # Set start and end time for the forecast
        start = game_datetime.isoformat()  # Ensure ISO 8601 format without 'Z'
        end = (game_datetime + pd.Timedelta(hours=1)).isoformat()  # Add 1 hour

        # Fetch the weather data for the game
        weather_data = fetch_weather_data(latitude, longitude, start, end)
        
        # Get the first row of the weather data (since we're getting 1 hour of forecast)
        first_row = weather_data.iloc[0]

        # Append the weather data to the lists
        temperature_2m_list.append(first_row["temperature_2m"])
        relative_humidity_2m_list.append(first_row["relative_humidity_2m"])
        dew_point_2m_list.append(first_row["dew_point_2m"])
        precipitation_probability_list.append(first_row["precipitation_probability"])
        surface_pressure_list.append(first_row["surface_pressure"])
        wind_speed_10m_list.append(first_row["wind_speed_10m"])
        wind_direction_10m_list.append(first_row["wind_direction_10m"])
        weather_code_list.append(first_row["weather_code"])

    # Append the new weather columns to game_df
    game_df["temperature_2m"] = temperature_2m_list
    game_df["relative_humidity_2m"] = relative_humidity_2m_list
    game_df["dew_point_2m"] = dew_point_2m_list
    game_df["precipitation_probability"] = precipitation_probability_list
    game_df["surface_pressure"] = surface_pressure_list
    game_df["wind_speed_10m"] = wind_speed_10m_list
    game_df["wind_direction_10m"] = wind_direction_10m_list
    game_df["weather_code"] = weather_code_list


    return game_df

##### 1. Swish Analytics

Swish Analytics contains weather projections to be used before MLB Stats API updates theirs.

Scrape

In [None]:
# Scrape Swish Analytics for weather data
def swishanalytics(date):
    # Reformat date to fit URL
    date_dash = f"{date[:4]}-{date[4:6]}-{date[6:8]}"
    
    # Swish Analytics URL 
    url = "https://swishanalytics.com/mlb/weather?date=" + date_dash

     # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all divs with the class 'weather-card'
        weather_cards = soup.find_all('div', class_='weather-card')
        
        # Initialize an empty list to store DataFrames
        dfs = []
        
        # Iterate over each weather card
        for weather_card in weather_cards:
            # Extract relevant information from the weather card
            time_info = weather_card.find('small', class_='text-muted')
            location_info = weather_card.find('h4', class_='lato inline vert-mid bold')
            
            # Extract time and location information
            time = time_info.text.strip() if time_info else None
            location = location_info.text.strip() if location_info else None
            
            # Find the table within the weather card
            table = weather_card.find('table', class_='table-bordered')
            
            # If table exists, extract data from it
            if table:
                # Extract table data into a list of lists
                rows = table.find_all('tr')
                data = []
                for row in rows:
                    cells = row.find_all(['th', 'td'])
                    row_data = [cell.text.strip() for cell in cells]
                    data.append(row_data)
                
                # Convert data into a pandas DataFrame
                df = pd.DataFrame(data)
                
                # Set the first row as the column headers
                df.columns = df.iloc[0]
                df = df[1:]  # Remove the first row since it's the header row
                
                # Add time and location as additional columns
                df['Time'] = time
                df['Location'] = location

                # Create dataframem from the second time period scraped
                daily_weather_df = pd.DataFrame(df.iloc[:, 2]).T
                # Extract home team name 
                daily_weather_df['Matchup'] = df['Location'][1]
                daily_weather_df['FANGRAPHSTEAM'] = daily_weather_df['Matchup'].str.split("@", expand=True).iloc[:, 1]
                daily_weather_df['FANGRAPHSTEAM'] = daily_weather_df['FANGRAPHSTEAM'].str.replace("\xa0\xa0", "")

                dfs.append(daily_weather_df)
        
    else:
        print("Failed to retrieve the page. Status code:", response.status_code)

    # Append together dataframes
    df = pd.concat(dfs, axis=0)
    
    # Rename columns
    df.rename(columns={1:'Weather', 2:'Temperature', 3:'Feels Like', 4:'Humidity', 5:'Speed', 6:'Direction', 'BBREFTEAM': 'home_team'}, inplace=True)

    # Clean
    df['Speed'] = df['Speed'].str.replace(" mph", "").astype(float)
    df['Temperature'] = df['Temperature'].str.replace('°', '')
    df['Feels Like'] = df['Feels Like'].str.replace('°', '')
    df.reset_index(drop=False, inplace=True, names='Time')

    
    return df

##### 2. RotoGrinders

RotoGrinders hosts weather warnings used to identify matchups to avoid based on weather risk.

Scrape

In [None]:
def rotogrinders(date, team_map):
    # URL of the web page containing the table
    url = "https://rotogrinders.com/weather/mlb"

    # Send a GET request to the URL and retrieve the response
    response = requests.get(url)

    # Check if the response is successful (status code 200)
    if response.status_code == 200:
        # Get the HTML content from the response
        html_content = response.text

        soup = BeautifulSoup(html_content, "html.parser")

        # Find all <li> elements within the <ul>
        li_elements = soup.find_all("li", class_="weather-blurb")

        # Create an empty list to store the data
        data = []

        for li_element in li_elements:
            # Extract the tag colors from the <span> elements
            tag_elements = li_element.find_all("span", class_=["green", "yellow", "orange", "red"])
        
            # Extract the first tag color
            tag = tag_elements[0].text.strip() if tag_elements else None
        
            # Extract the second tag color if it exists
            tag2 = tag_elements[1].text.strip() if len(tag_elements) > 1 else None
        
            # Extract the matchup from the <span> element with class "bold"
            matchup_span = li_element.find("span", class_="bold")
            matchup = matchup_span.text.strip() if matchup_span else None
        
            # Extract the description if it exists
            if matchup_span:
                description_span = matchup_span.find_next_sibling("span")
                description = description_span.text.strip() if description_span else None
            else:
                description = None
        
            # Append the data to the list
            data.append({"Tag": tag, "Tag2": tag2, "Matchup": matchup, "Description": description})


        # Convert the list of dictionaries to a DataFrame
        df = pd.DataFrame(data)

        df[['away', 'home']] = df['Matchup'].str.split(" @ ", expand=True)

        # Add in DK team abbreviations 
        df = df.merge(team_map[['ROTOGRINDERSTEAM', 'DKTEAM']], left_on=['away'], right_on=['ROTOGRINDERSTEAM'], how='left', suffixes=("", "_away"))
        df = df.merge(team_map[['ROTOGRINDERSTEAM', 'DKTEAM']], left_on=['home'], right_on=['ROTOGRINDERSTEAM'], how='left', suffixes=("", "_home"))
        df = df[['Tag', 'Tag2', 'Matchup', 'DKTEAM', 'DKTEAM_home', 'Description']]
        df.rename(columns={'DKTEAM':'Away', 'DKTEAM_home': 'Home'}, inplace=True)
        
        # Add the date column to the DataFrame
        df['date'] = date

        return df
    else:
        # Return an error message if the response is not successful
        return "Failed to retrieve data. Response status code: {}".format(response.status_code)

##### 3. Park x Weather Factors

Calculate wind x and y vectors

In [None]:
def calculate_vectors(row, azimuth_column, wind_column, speed_column):
    angle = row[wind_column] - row[azimuth_column]
    
    # Calculate vectors
    x_vect = round(math.sin(math.radians(angle)), 5) * row[speed_column] * -1
    y_vect = round(math.cos(math.radians(angle)), 5) * row[speed_column] * -1

    return pd.Series([x_vect, y_vect], index=['x_vect', 'y_vect'])

### Run

##### 1. Open Meteo

Columns to extract

In [16]:
# Columns from game_df
game_columns = ['game_id', 'game_datetime', 'game_date', 'date', 'year', 'game_type', 'status', 'away_team', 'home_team', 'doubleheader', 'game_num', 'venue_id', 'venue_name']
# Columns Venue Map
venue_columns = ['location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude', 'fieldInfo.leftLine', 'fieldInfo.center', 'fieldInfo.rightLine', 'fieldInfo.leftCenter',
                 'fieldInfo.rightCenter', 'location.elevation', 'location.azimuthAngle', 'fieldInfo.roofType', 'active']
# Columns from Open Mateo 
weather_columns = ['temperature_2m', 'relative_humidity_2m', 'dew_point_2m', 'surface_pressure', 'wind_speed_10m', 'wind_direction_10m', 'weather_code']
# Forecast-only columns from Open Meteo
forecast_only_columns = ['precipitation_probability']

In [21]:
%%time
# Loop over dates
for date in game_df['date'].unique():
    print(date)
    if int(date) == int(todaysdate):
        # Create daily weather dataframe (forecast)
        create_daily_weather_df(game_df[game_df['date'] == date])[game_columns + venue_columns + weather_columns + forecast_only_columns].to_csv(os.path.join(baseball_path, "A06. Weather", "1. Open Meteo", f"Open Meteo {date}.csv"), index=False)
    else:
        # Create historic weather dataframe
        create_historic_weather_df(game_df[game_df['date'] == date])[game_columns + venue_columns + weather_columns].to_csv(os.path.join(baseball_path, "A06. Weather", "1. Open Meteo", f"Open Meteo {date}.csv"), index=False)
        time.sleep(2)

20250412
CPU times: total: 203 ms
Wall time: 233 ms


##### 1. Swish Analytics

In [None]:
try:
    # Scrape Swish Analytics
    swishanalytics_df = swishanalytics(todaysdate)
    # To CSV
    swishanalytics_df.to_csv(os.path.join(baseball_path, "A06. Weather", "1. Swish Analytics", f"Swish Analytics {todaysdate}.csv"), index=False, encoding='iso-8859-1')
except:
    print("Could not scrape Swish Analytics weather data.")

##### 2. RotoGrinders

In [None]:
try:
    # Scrape RotoGrinders
    rotogrinders_df = rotogrinders(todaysdate, team_map)
    # To CSV
    rotogrinders_df.to_csv(os.path.join(baseball_path, "A06. Weather", "2. RotoGrinders", f"RotoGrinders {todaysdate}.csv"), index=False)
except:
    print("Could not scrape RotoGrinders weather data.")

##### 3. Park x Weather Factors

In [None]:
mlb_weather_variables = ['x_vect', 'y_vect', 'temperature'] # drop weather
meteo_duplicates_variables = ['meteo_x_vect', 'meteo_y_vect', 'temperature_2m']
meteo_weather_variables = ['relative_humidity_2m', 'dew_point_2m', 'surface_pressure']
mlb_park_variables = ['fieldInfo.leftLine', 'fieldInfo.center', 'fieldInfo.rightLine', 'fieldInfo.leftCenter', 'fieldInfo.rightCenter', 'location.elevation'] # drop roof type

In [None]:
generic_venue_list = [2523, 2529]

# Read in Meteo Data
meteo_df = pd.read_csv(os.path.join(baseball_path, "A06. Weather", "1. Open Meteo", f"Open Meteo {todaysdate}.csv"), encoding='iso-8859-1')

# Read in latest park data
l_park_latest_df = pd.read_csv(os.path.join(baseball_path, "Park Latest - LHB.csv"))
r_park_latest_df = pd.read_csv(os.path.join(baseball_path, "Park Latest - RHB.csv"))

# Venue dummies
venue_dummies = [col for col in l_park_latest_df.columns if (col.startswith("venue_") and not col.endswith("id"))]

# Read in base rates
base_rate_df = pd.read_csv(os.path.join(baseball_path, "Base Rates.csv"))

# Calculate wind vectors
meteo_df[['meteo_x_vect', 'meteo_y_vect']] = meteo_df.apply(lambda row: calculate_vectors(row, 'location.azimuthAngle', 'wind_direction_10m', 'wind_speed_10m'), axis=1)


meteo_df[['weather', 'wind', 'venue', 'date', 'missing_weather']] = meteo_df['game_id'].apply(lambda game_id: pd.Series(create_box(game_id)))

# Domes
mask = meteo_df['weather'].str.contains('Roof|Dome', case=False, na=False)

# # Apply the updates using the mask for each column
meteo_df.loc[mask, 'temperature_2m'] = 70
meteo_df.loc[mask, 'meteo_x_vect'] = 0
meteo_df.loc[mask, 'meteo_y_vect'] = 0
meteo_df.loc[mask, 'relative_humidity_2m'] = 60
meteo_df.loc[mask, 'dew_point_2m'] = 57


# Loop over events
lg_pfx_list = []
for event in events_list: 
    # Define the dependent variable (e.g., `b1`) and independent variables
    lg_pfx_list += [f'{event}_lg', f'{event}_lg', f'{event}_pfx']

# Identify inputs
# Consider replacing with MLB weather, if available
specific_model_input_list = meteo_duplicates_variables + meteo_weather_variables + mlb_park_variables + lg_pfx_list + venue_dummies
generic_model_input_list = meteo_duplicates_variables + meteo_weather_variables + mlb_park_variables + lg_pfx_list

### LHB    
# Specific, where available
weather_input_l_df = pd.merge(meteo_df, l_park_latest_df.drop_duplicates('venue_id', keep='last').drop(columns={'gamePk', 'game_date'}), on=['venue_id'], suffixes=("", "_l"), how='left')
# Predicted outputs
wfx_l_columns = [f"{col}_wfx_l" for col in list(predict_wfx_l.classes_)]

# Create a mask for rows where there are no missing values in the required columns
valid_rows = weather_input_l_df[specific_model_input_list].notna().all(axis=1)

# Convert infinites to 0
for col in specific_model_input_list:
    weather_input_l_df[col] = weather_input_l_df[col].replace([np.inf, -np.inf], 0)


# Make predictions only for valid rows
weather_input_l_df.loc[valid_rows, wfx_l_columns] = predict_wfx_l.predict_proba(weather_input_l_df.loc[valid_rows, specific_model_input_list].values)

# Optionally, fill the missing predictions with NaN
weather_input_l_df[wfx_l_columns] = weather_input_l_df[wfx_l_columns].astype(float)

# Convert to PFX
for event in events_list:
    weather_input_l_df[f"{event}_wfx_l"] = weather_input_l_df[f"{event}_wfx_l"] / base_rate_df[event][0]

# Generic. Always.
# Predicted outputs
generic_wfx_l_columns = [f"{col}_generic_wfx_l" for col in list(predict_wfx_l.classes_)]

# Fill missings with column averages
for col in weather_input_l_df[generic_model_input_list]:
    average = weather_input_l_df[col].mean()
    weather_input_l_df[col].fillna(average, inplace=True)

# Make predictions
weather_input_l_df[generic_wfx_l_columns] = predict_generic_wfx_l.predict_proba(weather_input_l_df[generic_model_input_list].values)

# Convert to PFX
for event in events_list:
    weather_input_l_df[f"{event}_generic_wfx_l"] = weather_input_l_df[f"{event}_generic_wfx_l"] / base_rate_df[event][0]

# Replace park-specific wfx estimates with generic ones if in generic_venue_list
for event in events_list:
    weather_input_l_df[f"{event}_wfx_l"] = np.where(weather_input_l_df['venue_id'].isin(generic_venue_list), weather_input_l_df[f"{event}_generic_wfx_l"], weather_input_l_df[f"{event}_wfx_l"])


### RHB    
# Specific, where available
weather_input_r_df = pd.merge(meteo_df, r_park_latest_df.drop_duplicates('venue_id', keep='last').drop(columns={'gamePk', 'game_date'}), on=['venue_id'], suffixes=("", "_r"), how='left')
# Predicted outputs
wfx_r_columns = [f"{col}_wfx_r" for col in list(predict_wfx_r.classes_)]

# Create a mask for rows where there are no missing values in the required columns
valid_rows = weather_input_r_df[specific_model_input_list].notna().all(axis=1)

# Convert infinites to 0
for col in specific_model_input_list:
    weather_input_r_df[col] = weather_input_r_df[col].replace([np.inf, -np.inf], 0)


# Make predictions only for valid rows
weather_input_r_df.loc[valid_rows, wfx_r_columns] = predict_wfx_r.predict_proba(weather_input_r_df.loc[valid_rows, specific_model_input_list].values)

# Optionally, fill the missing predictions with NaN
weather_input_r_df[wfx_r_columns] = weather_input_r_df[wfx_r_columns].astype(float)

# Convert to PFX
for event in events_list:
    weather_input_r_df[f"{event}_wfx_r"] = weather_input_r_df[f"{event}_wfx_r"] / base_rate_df[event][0]

# Generic. Always.
# Predicted outputs
generic_wfx_r_columns = [f"{col}_generic_wfx_r" for col in list(predict_wfx_r.classes_)]

# Fill missings with column averages
for col in weather_input_r_df[generic_model_input_list]:
    average = weather_input_r_df[col].mean()
    weather_input_r_df[col].fillna(average, inplace=True)

# Make predictions
weather_input_r_df[generic_wfx_r_columns] = predict_generic_wfx_r.predict_proba(weather_input_r_df[generic_model_input_list].values)

# Convert to PFX
for event in events_list:
    weather_input_r_df[f"{event}_generic_wfx_r"] = weather_input_r_df[f"{event}_generic_wfx_r"] / base_rate_df[event][0]

# Replace park-specific wfx estimates with generic ones if in generic_venue_list
for event in events_list:
    weather_input_r_df[f"{event}_wfx_r"] = np.where(weather_input_r_df['venue_id'].isin(generic_venue_list), weather_input_r_df[f"{event}_generic_wfx_r"], weather_input_r_df[f"{event}_wfx_r"])



# Combine LHB and RHB weather effects
wfx_df = pd.concat([weather_input_l_df, weather_input_r_df[wfx_r_columns]], axis=1)

# Clean date
wfx_df['date'] = wfx_df['game_date'].str.replace("-", "")
# Rename
wfx_df.rename(columns={'game_id': 'gamePk'}, inplace=True)



wfx_df[['gamePk', 'game_date', 'date', 'game_num', 'away_team', 'home_team', 'venue_id', 'meteo_x_vect', 'meteo_y_vect', 'temperature_2m', 'weather', 'wind'] + wfx_l_columns + wfx_r_columns].to_csv(os.path.join(baseball_path, "A06. Weather", "3. Park and Weather Factors", f"Park and Weather Factors {todaysdate}.csv"), index=False, encoding='iso-8859-1')