The logic in this notebook is recreated in the weather_api_call.py file

In [298]:
import json
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import requests
import tomli

In [299]:
API_TOML_DIR = Path(Path.cwd().parent, "api_creds.toml")
WEATHER_LOCATION_MAPPING_DIR = Path(
    Path.cwd().parent, "data_information", "weather_location_mapping.json"
)
PTID_AREA_MAPPING_DIR = Path(
    Path.cwd().parent, "data_information", "PTID_name_mapping.json"
)

In [300]:
with open(API_TOML_DIR, "rb") as f:
    key = tomli.load(f)["api_key"]

In [301]:
def get_forecast(
    lat_lon: tuple[float, float], forecast_days: int = 3, api_key: str = key
) -> dict:
    """Gets a forecase for a given lat lon from the https://www.weatherapi.com/ site.
    Requires an api key to be defined.

    Args:
        lat_lon (tuple[float, float]): lat/lon of the location to be forecasted
        forecast_days (int, optional): number of days to forecast, note free tier weatherapi is restricted to 14 days. Defaults to 3.
        api_key (str, optional): api key for weather api. Defaults to key.

    Raises:
        SystemExit: generic error catch for incorrect request parameters

    Returns:
        dict: json of the returned api call#
    """
    BASE_URL = "https://api.weatherapi.com/v1/forecast.json?"
    str_lat_lon = ",".join([str(x) for x in lat_lon])
    query_params = {"q": str_lat_lon, "days": forecast_days, "key": api_key}
    try:
        response = requests.get(BASE_URL, params=query_params)
    except (
        requests.exceptions.RequestException
    ) as e:  # TODO Generic error catching = bad
        raise SystemExit(e)
    return response.json()

In [302]:
mapping_json = json.loads(
    Path(WEATHER_LOCATION_MAPPING_DIR).read_text(encoding="UTF-8")
)
area_lat_lon = mapping_json["lat_lon"]
grid_zone_mapping = mapping_json["grid_zone"]

In [303]:
ptid_area_mapping = json.loads(Path(PTID_AREA_MAPPING_DIR).read_text(encoding="UTF-8"))

In [304]:
df = pd.DataFrame.from_dict(area_lat_lon).T.rename(columns={0: "Lat", 1: "Lon"})
df["Lat"] = df["Lat"].astype(float).round(2)
df["Lon"] = df["Lon"].astype(float).round(2)
df["Grid Zone"] = df.index.map(grid_zone_mapping)
df["PTID"] = df["Grid Zone"].map(ptid_area_mapping)
df["Lat_Lon"] = tuple(zip(df["Lat"], df["Lon"]))
df = df.reset_index(drop=False).rename(columns={"index": "Area"})

In [305]:
def parse_forcast_response(forecast_response_json: dict) -> dict[str, dict[str, float]]:
    """From a full api response dict, extract the latitude, longitude, forecast dates,
    hourley forecast for dates, with corresponding temperatures

    Args:
        forecast_response_json (dict): full response from get_forecast function

    Returns:
        dict: latitude, longitude, dates,hours, minmax temp dictionary
    """
    num_days_forecasted = range(len(forecast_response_json["forecast"]["forecastday"]))
    num_hours = range(0, 24)
    forecast_day = forecast_response_json["forecast"]["forecastday"]
    lat = forecast_response_json["location"]["lat"]
    lon = forecast_response_json["location"]["lon"]
    tuple([lat, lon])
    days = []
    time = []
    temps = []
    for day in num_days_forecasted:
        days.append(forecast_day[day]["date"])
        for hour in num_hours:
            time.append(forecast_day[day]["hour"][hour]["time"])
            temps.append(forecast_day[day]["hour"][hour]["temp_c"])
    # Extracting time from datetime
    time = [datetime.strptime(x, "%Y-%m-%d %H:%M") for x in time]
    time = [str(x.time()) for x in time]
    response_dict = {}
    # response_dict['lat_lon'] = lat_lon
    step = len(time) / len(forecast_response_json["forecast"]["forecastday"])
    for i in num_days_forecasted:
        step1 = int(i * step)
        step2 = int((i + 1) * step)
        for j in num_hours:
            response_dict[days[i]] = dict(zip(time[step1:step2], temps[step1:step2]))
    """
    "Steps" explanation above. 
    the time and temps list contain all the time/temps for every day and hour
    To map the correct chunk of each list to the correct day I need to split
    each list into sizes of len(temps)/num_days -> "step". 
    I then slice each list based on this step and append to the correct day key in the dict. 
    """
    df = pd.json_normalize(response_dict, sep=" ")

    return df.to_dict(orient="records")[0]

In [306]:
df.head()

Unnamed: 0,Area,Lat,Lon,Grid Zone,PTID,Lat_Lon
0,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)"
1,ART,43.97,-75.91,MHK VL,61756.0,"(43.97, -75.91)"
2,BGM,42.1,-75.92,CENTRL,61754.0,"(42.1, -75.92)"
3,BUF,42.88,-78.88,WEST,61752.0,"(42.88, -78.88)"
4,ELM,42.08,-76.8,CENTRL,61754.0,"(42.08, -76.8)"


## Testing joins

# Row 0 only

In [307]:
row_0_response = parse_forcast_response(get_forecast(lat_lon=df.iloc[0]["Lat_Lon"]))
df_row_0 = pd.DataFrame(df.iloc[0]).T
df_row_0["forecast_response"] = df_row_0["Lat_Lon"].apply(
    lambda x: parse_forcast_response(get_forecast(x))
)

df_0_forecast = pd.json_normalize(df_row_0["forecast_response"]).T.reset_index()
df_0_forecast = df_0_forecast.rename(columns={0: "temp", "index": "timestamp"})
df_0_forecast["timestamp"] = pd.to_datetime(df_0_forecast["timestamp"])
df_0_forecast["year"] = df_0_forecast["timestamp"].dt.year
df_0_forecast["month"] = df_0_forecast["timestamp"].dt.month
df_0_forecast["day"] = df_0_forecast["timestamp"].dt.day
df_0_forecast["minute"] = df_0_forecast["timestamp"].dt.minute
df_0_forecast["hour"] = df_0_forecast["timestamp"].dt.hour

daily_min = (
    df_0_forecast.groupby("day", as_index=False)
    .min()[["day", "temp"]]
    .rename(columns={"temp": "min_temp"})
)
daily_max = (
    df_0_forecast.groupby("day", as_index=False)
    .max()[["day", "temp"]]
    .rename(columns={"temp": "max_temp"})
)

df_0_max_merge = df_0_forecast.merge(daily_max, on="day")
df_0_min_merge = df_0_max_merge.merge(daily_min, on="day")

df_row_0 = df_row_0.drop("forecast_response", axis=1, errors="ignore")
df_row_0 = df_row_0.loc[np.repeat(df_row_0.index, df_0_min_merge.shape[0])].reset_index(
    drop=True
)

df_row_0_final = pd.concat([df_row_0, df_0_min_merge], axis=1)
df_row_0_final = df_row_0_final.dropna()
df_row_0_final.head()

Unnamed: 0,Area,Lat,Lon,Grid Zone,PTID,Lat_Lon,timestamp,temp,year,month,day,minute,hour,max_temp,min_temp
0,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 00:00:00,20.1,2023,7,23,0,0,30.2,16.0
1,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 01:00:00,19.2,2023,7,23,0,1,30.2,16.0
2,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 02:00:00,18.3,2023,7,23,0,2,30.2,16.0
3,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 03:00:00,17.6,2023,7,23,0,3,30.2,16.0
4,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 04:00:00,17.0,2023,7,23,0,4,30.2,16.0


In [309]:
def prepare_prediction_df(area_df: pd.DataFrame, df_row_no: int):
    _df = area_df.copy()
    parse_forcast_response(get_forecast(lat_lon=_df.iloc[df_row_no]["Lat_Lon"]))
    df_row_at_num = pd.DataFrame(_df.iloc[df_row_no]).T
    df_row_at_num["forecast_response"] = df_row_at_num["Lat_Lon"].apply(
        lambda x: parse_forcast_response(get_forecast(x))
    )

    df_row_forecast = pd.json_normalize(
        df_row_at_num["forecast_response"]
    ).T.reset_index()
    df_row_forecast = df_row_forecast.rename(columns={0: "temp", "index": "timestamp"})
    df_row_forecast["timestamp"] = pd.to_datetime(df_row_forecast["timestamp"])
    df_row_forecast["year"] = df_row_forecast["timestamp"].dt.year
    df_row_forecast["month"] = df_row_forecast["timestamp"].dt.month
    df_row_forecast["day"] = df_row_forecast["timestamp"].dt.day
    df_row_forecast["minute"] = df_row_forecast["timestamp"].dt.minute
    df_row_forecast["hour"] = df_row_forecast["timestamp"].dt.hour

    daily_min = (
        df_row_forecast.groupby("day", as_index=False)
        .min()[["day", "temp"]]
        .rename(columns={"temp": "min_temp"})
    )
    daily_max = (
        df_row_forecast.groupby("day", as_index=False)
        .max()[["day", "temp"]]
        .rename(columns={"temp": "max_temp"})
    )

    df_row_max_merge = df_row_forecast.merge(daily_max, on="day")
    df_row_min_merge = df_row_max_merge.merge(daily_min, on="day")

    df_row_at_num = df_row_at_num.drop("forecast_response", axis=1, errors="ignore")
    df_row_at_num = df_row_at_num.loc[
        np.repeat(df_row_at_num.index, df_row_min_merge.shape[0])
    ].reset_index(drop=True)

    df_row_final = pd.concat([df_row_at_num, df_row_min_merge], axis=1)
    df_row_final = df_row_final.dropna()
    return df_row_final

In [314]:
prediction_dfs = []
for row_num in range(len(df)):
    prediction_dfs.append(prepare_prediction_df(df, row_num))

In [317]:
merged_prediction_df = pd.concat(prediction_dfs, axis=0)
merged_prediction_df

Unnamed: 0,Area,Lat,Lon,Grid Zone,PTID,Lat_Lon,timestamp,temp,year,month,day,minute,hour,max_temp,min_temp
0,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 00:00:00,20.1,2023,7,23,0,0,30.2,16.0
1,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 01:00:00,19.2,2023,7,23,0,1,30.2,16.0
2,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 02:00:00,18.3,2023,7,23,0,2,30.2,16.0
3,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 03:00:00,17.6,2023,7,23,0,3,30.2,16.0
4,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)",2023-07-23 04:00:00,17.0,2023,7,23,0,4,30.2,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,UCA,43.23,-75.48,MHK VL,61756.0,"(43.23, -75.48)",2023-07-25 19:00:00,24.9,2023,7,25,0,19,27.8,14.5
68,UCA,43.23,-75.48,MHK VL,61756.0,"(43.23, -75.48)",2023-07-25 20:00:00,24.1,2023,7,25,0,20,27.8,14.5
69,UCA,43.23,-75.48,MHK VL,61756.0,"(43.23, -75.48)",2023-07-25 21:00:00,19.3,2023,7,25,0,21,27.8,14.5
70,UCA,43.23,-75.48,MHK VL,61756.0,"(43.23, -75.48)",2023-07-25 22:00:00,18.3,2023,7,25,0,22,27.8,14.5


# Prediction Preprocessing

The model is expecting a dataframe of the following structure:

['Min Temp',
 'Max Temp',
 'Time Stamp',
 'PTID',
 'Year',
 'Month',
 'Day',
 'Minute',
 'Hour',
 'Month_sin',
 'Month_cos',
 'Day_sin',
 'Day_cos',
 'Minute_sin',
 'Minute_cos',
 'Hour_sin',
 'Hour_cos']

 Prior to pasing for predictions we need to preprocess the data recieved from the weather api call to get it into the right shape. 