In [18]:
import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path

import pandas as pd
import requests
import tomli

In [19]:
@dataclass
class PredResult:
    area: str = None
    lat: float = None
    lon: float = None
    lat_lon: tuple[float, float] = None
    grid_zone: str = None
    ptid: int = None
    date: str = None
    max_temp: float = None
    min_temp: float = None
    predicted_load: float = None

In [20]:
API_TOML_DIR = Path(Path.cwd().parent, "api_creds.toml")
WEATHER_LOCATION_MAPPING_DIR = Path(
    Path.cwd().parent, "data_information", "weather_location_mapping.json"
)
PTID_AREA_MAPPING_DIR = Path(
    Path.cwd().parent, "data_information", "PTID_name_mapping.json"
)

In [21]:
with open(API_TOML_DIR, "rb") as f:
    key = tomli.load(f)["api_key"]

In [22]:
def get_forecast(
    lat_lon: tuple[float, float], forecast_days: int = 3, api_key: str = key
) -> dict:
    """Gets a forecase for a given lat lon from the https://www.weatherapi.com/ site.
    Requires an api key to be defined.

    Args:
        lat_lon (tuple[float, float]): lat/lon of the location to be forecasted
        forecast_days (int, optional): number of days to forecast, note free tier weatherapi is restricted to 14 days. Defaults to 3.
        api_key (str, optional): api key for weather api. Defaults to key.

    Raises:
        SystemExit: generic error catch for incorrect request parameters

    Returns:
        dict: json of the returned api call#
    """
    BASE_URL = "https://api.weatherapi.com/v1/forecast.json?"
    str_lat_lon = ",".join([str(x) for x in lat_lon])
    query_params = {"q": str_lat_lon, "days": forecast_days, "key": api_key}
    try:
        response = requests.get(BASE_URL, params=query_params)
    except (
        requests.exceptions.RequestException
    ) as e:  # TODO Generic error catching = bad
        raise SystemExit(e)
    return response.json()

In [23]:
mapping_json = json.loads(
    Path(WEATHER_LOCATION_MAPPING_DIR).read_text(encoding="UTF-8")
)
area_lat_lon = mapping_json["lat_lon"]
grid_zone_mapping = mapping_json["grid_zone"]

In [24]:
ptid_area_mapping = json.loads(Path(PTID_AREA_MAPPING_DIR).read_text(encoding="UTF-8"))

In [25]:
df = pd.DataFrame.from_dict(area_lat_lon).T.rename(columns={0: "Lat", 1: "Lon"})
df['Lat'] = df['Lat'].astype(float).round(2)
df['Lon'] = df['Lon'].astype(float).round(2)
df["Grid Zone"] = df.index.map(grid_zone_mapping)
df["PTID"] = df["Grid Zone"].map(ptid_area_mapping)
df["Lat_Lon"] = tuple(zip(df["Lat"], df["Lon"]))
df = df.reset_index(drop=False).rename(columns={"index": "Area"})

In [26]:
# Converting dataframe to list of dataclasses
area_info = []  # TODO This is a poor name
for row in df.itertuples():
    result_class = PredResult()
    result_class.area = row.Area
    result_class.lat = float(row.Lat)
    result_class.lon = float(row.Lon)
    result_class.lat_lon = tuple([float(row.Lat), float(row.Lon)])
    result_class.grid_zone = (
        row._4
    )  # Not sure why grid_zone col name hasnt flowed through
    result_class.ptid = float(row.PTID)
    area_info.append(result_class)

In [27]:
def write_forecast_results(
    results_list: list[PredResult], **kwargs
) -> list[PredResult]:
    """Gets min/max temperatures for given number of days as specified by
    "forecast_days" kwarg passed to get_forecast function

    Args:
        results_list (list[PredResult]): list of PredResult dataclasses holding area info:
        area name, lat_lon, grid_zone, ptid

    Returns:
        list[PredResult]: same list of PredResults with additional date and min/max
        forecasted temperature data
    """
    for res in results_list:
        forecast = parse_forcast_response(get_forecast(lat_lon=res.lat_lon, **kwargs))
        res.date = forecast["dates"]
        res.max_temp = forecast["max_temps"]
        res.min_temp = forecast["min_temps"]
    return results_list

In [28]:
def parse_forcast_response(forecast_response_json: dict) -> dict[str,dict[str,float]]:
    """From a full api response dict, extract the latitude, longitude, forecast dates, 
    hourley forecast for dates, with corresponding temperatures

    Args:
        forecast_response_json (dict): full response from get_forecast function

    Returns:
        dict: latitude, longitude, dates,hours, minmax temp dictionary
    """
    num_days_forecasted = range(len(forecast_response_json["forecast"]["forecastday"]))
    num_hours = range(0,23)
    forecast_day = forecast_response_json["forecast"]["forecastday"]
    lat = forecast_response_json["location"]["lat"]
    lon = forecast_response_json["location"]["lon"]
    tuple([lat, lon])
    days = []
    time = []
    temps = []
    for day in num_days_forecasted:
        days.append(forecast_day[day]['date'])
        for hour in num_hours:
            time.append(forecast_day[day]['hour'][hour]['time'])
            temps.append(forecast_day[day]['hour'][hour]['temp_c'])
    # Extracting time from datetime
    time = [datetime.strptime(x,'%Y-%m-%d %H:%M') for x in time]
    time = [str(x.time()) for x in time]
    response_dict = {}
    # response_dict['lat_lon'] = lat_lon
    step = len(time)/len(forecast_response_json["forecast"]["forecastday"])
    for i in num_days_forecasted:
        step1 = int(i*step)
        step2 = int((i+1)*step)
        for j in num_hours:
            response_dict[days[i]] = dict(zip(time[step1:step2], 
                                              temps[step1:step2]))
    """
    "Steps" explanation above. 
    the time and temps list contain all the time/temps for every day and hour
    To map the correct chunk of each list to the correct day I need to split
    each list into sizes of len(temps)/num_days -> "step". 
    I then slice each list based on this step and append to the correct day key in the dict. 
    """
 
    return response_dict

In [29]:
df.head()

Unnamed: 0,Area,Lat,Lon,Grid Zone,PTID,Lat_Lon
0,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)"
1,ART,43.97,-75.91,MHK VL,61756.0,"(43.97, -75.91)"
2,BGM,42.1,-75.92,CENTRL,61754.0,"(42.1, -75.92)"
3,BUF,42.88,-78.88,WEST,61752.0,"(42.88, -78.88)"
4,ELM,42.08,-76.8,CENTRL,61754.0,"(42.08, -76.8)"


In [30]:
response_test = get_forecast(lat_lon=df.iloc[0]['Lat_Lon'])
df['forecast_response'] = df['Lat_Lon'].apply(lambda x: parse_forcast_response(get_forecast(x)))

In [31]:
pd.DataFrame.from_dict(df.iloc[0]['forecast_response'])

Unnamed: 0,2023-07-22,2023-07-23,2023-07-24
00:00:00,18.5,19.5,19.6
01:00:00,17.5,18.7,18.6
02:00:00,17.0,17.0,17.8
03:00:00,16.9,13.6,17.0
04:00:00,16.8,13.3,16.3
05:00:00,16.6,13.6,16.2
06:00:00,16.5,12.7,16.1
07:00:00,16.7,14.8,17.6
08:00:00,17.4,16.8,16.3
09:00:00,20.4,20.8,22.8


In [32]:
df.head()

Unnamed: 0,Area,Lat,Lon,Grid Zone,PTID,Lat_Lon,forecast_response
0,ALB,42.65,-73.76,CAPITL,61757.0,"(42.65, -73.76)","{'2023-07-22': {'00:00:00': 18.5, '01:00:00': ..."
1,ART,43.97,-75.91,MHK VL,61756.0,"(43.97, -75.91)","{'2023-07-22': {'00:00:00': 16.7, '01:00:00': ..."
2,BGM,42.1,-75.92,CENTRL,61754.0,"(42.1, -75.92)","{'2023-07-22': {'00:00:00': 18.1, '01:00:00': ..."
3,BUF,42.88,-78.88,WEST,61752.0,"(42.88, -78.88)","{'2023-07-22': {'00:00:00': 18.7, '01:00:00': ..."
4,ELM,42.08,-76.8,CENTRL,61754.0,"(42.08, -76.8)","{'2023-07-22': {'00:00:00': 18.4, '01:00:00': ..."


In [33]:
test_parse = parse_forcast_response(response_test)
test_parse

{'2023-07-22': {'00:00:00': 18.5,
  '01:00:00': 17.5,
  '02:00:00': 17.0,
  '03:00:00': 16.9,
  '04:00:00': 16.8,
  '05:00:00': 16.6,
  '06:00:00': 16.5,
  '07:00:00': 16.7,
  '08:00:00': 17.4,
  '09:00:00': 20.4,
  '10:00:00': 22.2,
  '11:00:00': 25.3,
  '12:00:00': 26.4,
  '13:00:00': 27.3,
  '14:00:00': 27.7,
  '15:00:00': 28.1,
  '16:00:00': 27.7,
  '17:00:00': 26.6,
  '18:00:00': 26.0,
  '19:00:00': 27.1,
  '20:00:00': 22.5,
  '21:00:00': 22.1,
  '22:00:00': 21.2},
 '2023-07-23': {'00:00:00': 19.5,
  '01:00:00': 18.7,
  '02:00:00': 17.0,
  '03:00:00': 13.6,
  '04:00:00': 13.3,
  '05:00:00': 13.6,
  '06:00:00': 12.7,
  '07:00:00': 14.8,
  '08:00:00': 16.8,
  '09:00:00': 20.8,
  '10:00:00': 23.0,
  '11:00:00': 24.8,
  '12:00:00': 29.4,
  '13:00:00': 28.5,
  '14:00:00': 28.1,
  '15:00:00': 29.6,
  '16:00:00': 29.3,
  '17:00:00': 27.2,
  '18:00:00': 26.5,
  '19:00:00': 25.6,
  '20:00:00': 24.1,
  '21:00:00': 23.3,
  '22:00:00': 22.9},
 '2023-07-24': {'00:00:00': 19.6,
  '01:00:00': 18

# Prediction Preprocessing

The model is expecting a dataframe of the following structure:

['Min Temp',
 'Max Temp',
 'Time Stamp',
 'PTID',
 'Year',
 'Month',
 'Day',
 'Minute',
 'Hour',
 'Month_sin',
 'Month_cos',
 'Day_sin',
 'Day_cos',
 'Minute_sin',
 'Minute_cos',
 'Hour_sin',
 'Hour_cos']

 Prior to pasing for predictions we need to preprocess the data recieved from the weather api call to get it into the right shape. 

In [34]:
# forecast_df = pd.json_normalize(asdict(obj) for obj in forecast_results)
# forecast_df = forecast_df.explode(['date', 'max_temp', 'min_temp'])

In [35]:
# forecast_df.head()