### Imports

In [1]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U4. Datasets.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U5. Models.ipynb"

### Settings

In [2]:
# start_date = "20250327"

# # end_date =   yesterdaysdate
start_date = todaysdate
end_date = todaysdate

### Games

Create games

In [3]:
game_df = create_games(start_date, end_date, team_dict)

Merge with venue data

In [4]:
game_df = pd.merge(game_df, venue_map_df[['id', 'location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude',
                                          'fieldInfo.leftLine', 'fieldInfo.center', 'fieldInfo.rightLine', 'fieldInfo.leftCenter', 'fieldInfo.rightCenter', 
                                          'location.elevation', 'location.azimuthAngle', 'fieldInfo.roofType', 'active']], 
                                           left_on=['venue_id'], right_on=['id'], how='left')

Convert to datetime

In [5]:
game_df["game_datetime"] = pd.to_datetime(game_df["game_datetime"])

Drop if missing coordinates

In [6]:
game_df.dropna(subset=['location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude'], inplace=True)

### Historic

Create weather dataframe for historic games

In [None]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

def fetch_historical_weather_data(latitude, longitude, game_datetime):
    """Fetch historical weather data for a given game datetime and location."""

    # Convert game_datetime to date for API request
    game_date = game_datetime.strftime("%Y-%m-%d")

    # Define the parameters for the weather request
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": game_date,
        "end_date": game_date,
        "hourly": [
            "temperature_2m", "relative_humidity_2m", "dew_point_2m", 
            "weather_code", "surface_pressure", "wind_speed_10m", "wind_direction_10m"
        ],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "precipitation_unit": "inch"
    }

    # Fetch data from Open-Meteo API
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]

    # Process hourly data
    hourly = response.Hourly()
    hourly_data = {
        "datetime": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(),
        "dew_point_2m": hourly.Variables(2).ValuesAsNumpy(),
        "weather_code": hourly.Variables(3).ValuesAsNumpy(),
        "surface_pressure": hourly.Variables(4).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(5).ValuesAsNumpy(),
        "wind_direction_10m": hourly.Variables(6).ValuesAsNumpy()
    }

    return pd.DataFrame(hourly_data)

def create_historic_weather_df(game_df):
    """Append weather data to each game in game_df based on game_datetime."""

    # Convert game_datetime to UTC
    game_df["game_datetime"] = pd.to_datetime(game_df["game_datetime"], utc=True)

    # Lists to store the matched weather data
    weather_columns = [
        "temperature_2m", "relative_humidity_2m", "dew_point_2m",
        "weather_code", "surface_pressure", "wind_speed_10m", "wind_direction_10m"
    ]
    weather_data_lists = {col: [] for col in weather_columns}

    # Loop through each game in the DataFrame
    for _, row in game_df.iterrows():
        latitude = row["location.defaultCoordinates.latitude"]
        longitude = row["location.defaultCoordinates.longitude"]
        game_datetime = row["game_datetime"]

        # Fetch historical weather data for that day
        weather_data = fetch_historical_weather_data(latitude, longitude, game_datetime)

        # Find the closest weather timestamp to game_datetime
        closest_weather_row = weather_data.iloc[
            (weather_data["datetime"] - game_datetime).abs().argsort()[0]
        ]

        # Append the closest weather data to lists
        for col in weather_columns:
            weather_data_lists[col].append(closest_weather_row[col])

    # Add the weather data as new columns in game_df
    for col in weather_columns:
        game_df[col] = weather_data_lists[col]


    return game_df

### Forecast

Create forecast dataframe for today's games

In [None]:
# Make sure it doesn't have issues with double headers

In [7]:
def fetch_weather_data(latitude, longitude, start, end):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", 
                   "precipitation_probability", "surface_pressure", 
                   "wind_speed_10m", "wind_direction_10m", "weather_code"],
        "start": start,  # Start time of forecast
        "end": end,  # End time of forecast
        "wind_speed_unit": "mph",
        "temperature_unit": "fahrenheit",
        "precipitation_unit": "inch"
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process the weather data
    response = responses[0]
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_precipitation_probability = hourly.Variables(3).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(4).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(5).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()

    # Create the DataFrame with the weather data
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )
    }

    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["precipitation_probability"] = hourly_precipitation_probability
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
    hourly_data["weather_code"] = hourly_weather_code

    hourly_dataframe = pd.DataFrame(data=hourly_data)

    # Filter the data to only include rows within the requested time range
    hourly_dataframe["date"] = pd.to_datetime(hourly_dataframe["date"], utc=True)
    hourly_dataframe = hourly_dataframe[
        (hourly_dataframe["date"] >= pd.to_datetime(start, utc=True)) & 
        (hourly_dataframe["date"] <= pd.to_datetime(end, utc=True))
    ]

    return hourly_dataframe


# Now iterate over each game in game_df to fetch the weather data
def create_daily_weather_df(game_df):
    # Lists to hold the weather data columns
    temperature_2m_list = []
    relative_humidity_2m_list = []
    dew_point_2m_list = []
    precipitation_probability_list = []
    surface_pressure_list = []
    wind_speed_10m_list = []
    wind_direction_10m_list = []
    weather_code_list = []

    # Iterate over the rows of game_df
    for index, row in game_df.iterrows():
        latitude = row["location.defaultCoordinates.latitude"]
        longitude = row["location.defaultCoordinates.longitude"]
        game_datetime = pd.to_datetime(row["game_datetime"])

        # Set start and end time for the forecast
        start = game_datetime.isoformat()  # Ensure ISO 8601 format without 'Z'
        end = (game_datetime + pd.Timedelta(hours=1)).isoformat()  # Add 1 hour

        # Fetch the weather data for the game
        weather_data = fetch_weather_data(latitude, longitude, start, end)

        # Get the first row of the weather data (since we're getting 1 hour of forecast)
        first_row = weather_data.iloc[0]

        # Append the weather data to the lists
        temperature_2m_list.append(first_row["temperature_2m"])
        relative_humidity_2m_list.append(first_row["relative_humidity_2m"])
        dew_point_2m_list.append(first_row["dew_point_2m"])
        precipitation_probability_list.append(first_row["precipitation_probability"])
        surface_pressure_list.append(first_row["surface_pressure"])
        wind_speed_10m_list.append(first_row["wind_speed_10m"])
        wind_direction_10m_list.append(first_row["wind_direction_10m"])
        weather_code_list.append(first_row["weather_code"])

    # Append the new weather columns to game_df
    game_df["temperature_2m"] = temperature_2m_list
    game_df["relative_humidity_2m"] = relative_humidity_2m_list
    game_df["dew_point_2m"] = dew_point_2m_list
    game_df["precipitation_probability"] = precipitation_probability_list
    game_df["surface_pressure"] = surface_pressure_list
    game_df["wind_speed_10m"] = wind_speed_10m_list
    game_df["wind_direction_10m"] = wind_direction_10m_list
    game_df["weather_code"] = weather_code_list


    return game_df

### Run

Columns to extract

In [8]:
# Columns from game_df
game_columns = ['game_id', 'game_datetime', 'game_date', 'date', 'year', 'game_type', 'status', 'away_team', 'home_team', 'doubleheader', 'game_num', 'venue_id', 'venue_name']
# Columns Venue Map
venue_columns = ['location.defaultCoordinates.latitude', 'location.defaultCoordinates.longitude', 'fieldInfo.leftLine', 'fieldInfo.center', 'fieldInfo.rightLine', 'fieldInfo.leftCenter',
                 'fieldInfo.rightCenter', 'location.elevation', 'location.azimuthAngle', 'fieldInfo.roofType', 'active']
# Columns from Open Mateo 
weather_columns = ['temperature_2m', 'relative_humidity_2m', 'dew_point_2m', 'surface_pressure', 'wind_speed_10m', 'wind_direction_10m', 'weather_code']
# Forecast-only columns from Open Meteo
forecast_only_columns = ['precipitation_probability']

In [9]:
%%time
# Loop over dates
for date in game_df['date'].unique():
    print(date)
    if date == todaysdate:
        # Create daily weather dataframe (forecast)
        create_daily_weather_df(game_df[game_df['date'] == date])[game_columns + venue_columns + weather_columns + forecast_only_columns].to_csv(os.path.join(baseball_path, "A06. Weather", "1. Open Meteo", f"Open Meteo {date}.csv"), index=False)
    else:
        # Create historic weather dataframe
        create_historic_weather_df(game_df[game_df['date'] == date])[game_columns + venue_columns + weather_columns].to_csv(os.path.join(baseball_path, "A06. Weather", "1. Open Meteo", f"Open Meteo {date}.csv"), index=False)
        time.sleep(2)

20250403
CPU times: total: 62.5 ms
Wall time: 2.97 s
