In [None]:
ls

In [1]:
from pydantic_ai import Agent
import os
import requests
from datetime import datetime, timedelta
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from requests.models import PreparedRequest
from urllib.request import Request, urlopen
import numpy as np
from dateutil.relativedelta import relativedelta
from typing import Optional

import json
import plotly.express as px
import pandas as pd
from pydantic import Field
from codes import *
import pytz
from enum import Enum

from typing import Optional
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pytz
from pydantic import BaseModel

from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pytz
import requests
import pandas as pd
import plotly.express as px
from pydantic import BaseModel


In [2]:
prompt_process_query = """You are an AI assistant for the Hawai'i Climate Data Portal. 
You are responsible for answering the user query based on the results you get back from the tools.

When a user asks a query, carefully analyze the query and determine the best way to respond to the following questions:
1. What are the specific information the user is asking for?
2. What tools do you need to use to get the information?
3. What is the best way to format the response?

Plan your response step by step using the available tools and respond with the final answer only.

Output Format:
Only respond with the final answer. Do not lead the user to more conversation.
"""

In [3]:
prompt_process_agent = Agent(  
    "groq:deepseek-r1-distill-llama-70b",
    result_type=str,    
    system_prompt=prompt_process_query,
    model_settings={'temperature': 0.0}    
)

In [None]:

# prompt_process_agent = Agent(  
#     "google:gemini-1.5-flash",  # Gemini model
#     result_type=str,
#     system_prompt=prompt_process_query,
#     model_settings={'temperature': 0.0}    
# )


In [4]:
# You will be given a token to access the HCDP API. Add that token here.
hcdp_api_token = "c8aebebea3d9684526cfdab0fc62cbd6"
# Please input your email address. This will be used for user logging or distributing data packages
email = "INSERT_EMAIL_ADDRESS_HERE"

api_base_url = "https://api.hcdp.ikewai.org"
# Setup header for API requests
header = {
  "Authorization": f"Bearer {hcdp_api_token}"
}

In [5]:
## Functions from the HCDP documentation

def display_raster(params, title, cmap = plt.cm.viridis.reversed(), nodata_color = "#f0f0f0"):
    #construct raster endpoint url base
    raster_ep = "/raster"
    url = f"{api_base_url}{raster_ep}"
    #construct url with params
    url_constructor = PreparedRequest()
    url_constructor.prepare_url(url, params)
    full_url = url_constructor.url
    print(f"Constructed API request URL: {full_url}")
    #create request object for use with urlopen
    req = Request(full_url, headers = header)
    #seupt plot
    fig, ax = plt.subplots(figsize=(20, 10), facecolor = "#e0e0e0")
    #remove axis ticks (displays row, col numbers, not super helpful)
    ax.axes.get_xaxis().set_ticks([])
    ax.axes.get_yaxis().set_ticks([])
    #set plot title
    plt.title(title, fontsize = 20)
    #set nodata value
    cmap.set_bad(nodata_color)
    #open data stream from API
    with urlopen(req) as raster:
        #read tiff image
        img = mpimg.imread(raster, format = "tiff")
        #mask nodata values
        masked = np.ma.masked_equal(img, img[0][0])
        #plot on map with color schema and add color bar
        imgplot = ax.imshow(masked[:, :, 0], cmap = cmap)
        fig.colorbar(imgplot, ax = ax)
        
        
def query_stations(values, name, limit = 10000, offset = 0):
    params = {
        "name": name
    }
    for key in values:
        params[f"value.{key}"] = values[key]
    params = {
        "q": json.dumps(params),
        "limit": limit,
        "offset": offset
    }

    print(params)
    
    stations_ep = "/stations"
    url = f"{api_base_url}{stations_ep}"

    res = requests.get(url, params, headers = header)
    res.raise_for_status()
    print(f"Constructed API request URL: {res.url}")
    res = [item["value"] for item in res.json()["result"]]
    return res

def get_station_metadata():
    res = query_stations({}, name = "hcdp_station_metadata")
    data = {}
    for metadata in res:
        data[metadata[metadata["id_field"]]] = metadata
    return data

def get_station_data(values, metadata = None, limit = 10000, offset = 0):
    res = query_stations(values, name = "hcdp_station_value", limit = limit, offset = offset)
    combined = res
    if metadata is not None:
        combined = []
        # combine values with metadata for station
        for item in res:
            station_metadata = metadata.get(item["station_id"])
            #only return data with metadata
            if station_metadata is not None:
                #combine item with metadata and add to combined array
                combined.append(item | station_metadata)
    return combined

In [6]:
## Create classes for parameters to input to API

class DataType(str, Enum):
    TEMPERATURE = "temperature"
    RAINFALL = "rainfall"


class Aggregation(str, Enum):
    MIN = "min"
    MAX = "max"
    MEAN = "mean"


class Period(str, Enum):
    DAY = "day"
    MONTH = "month"
    YEAR = "year"

from enum import Enum

class Extent(str, Enum):
    '''
    HAWAII = Big Island, Hawai'i
    KAUAI = Kauai island, Hawai'i
    HONOLULU = O'ahu island, Hawai'i
    MAUI = Maui island
    STATEWIDE = all of the Hawaiian islands
    
    '''
    
    STATEWIDE = "statewide"  # Data for the whole state
    HAWAII = "bi"            # Hawaii county
    KAUAI = "ka"             # Kauai county
    MAUI = "mn"              # Maui county
    HONOLULU = "oa"          # Honolulu county



class ClimateDataParams(BaseModel):
    datatype: DataType
    aggregation: Aggregation
    period: Period
    start: str
    end: str
    extent: Extent = Extent.STATEWIDE
    lat: Optional[float] = None
    lng: Optional[float] = None

In [7]:
class ClimateAPI:
    def __init__(self, api_base_url: str, header: Dict[str, str]):
        self.api_base_url = api_base_url
        self.header = header
        self.raster_timeseries_ep = "/raster/timeseries"
        
    def get_timeseries_data(self, params: ClimateDataParams) -> pd.DataFrame:
        """
        Get timeseries data from the API based on provided parameters
        """
        url = f"{self.api_base_url}{self.raster_timeseries_ep}"
        
        # Convert pydantic model to dict for request
        params_dict = params.model_dump()
        
        res = requests.get(url, params_dict, headers=self.header)
        res.raise_for_status()
        print(f"Constructed API request URL: {res.url}")
        
        data = res.json()
        
        # Convert returned JSON to DataFrame
        df_data = list(data.items())
        
        # Determine column name based on datatype and aggregation
        value_col = (
            f"{params.aggregation.value.capitalize()} {params.datatype.value.capitalize()} (°C)"
            if params.datatype == DataType.TEMPERATURE
            else f"{params.aggregation.value.capitalize()} {params.datatype.value.capitalize()} (mm)"
        )
        
        df = pd.DataFrame(df_data, columns=["Date", value_col])
        df = df.sort_values(by="Date")
        
        return df


@prompt_process_agent.tool_plain 
def get_climate_timeseries(
    datatype: DataType,
    aggregation: Aggregation,
    period: Period,
    lat: float,
    lng: float,
    start_date: Optional[str] = None, 
    end_date: Optional[str] = None
) -> Dict[str, Any]:
    """
    Return temperature or rainfall timeseries data for the specified location, period and aggregation
    """
    api = ClimateAPI(api_base_url=api_base_url, header=header)
    print("API initialized:", api)
    
    # Default dates: from 1 year ago to yesterday
    today = datetime.now(pytz.timezone("US/Hawaii"))
    yesterday = today - timedelta(days=1)
    previous_year = today - relativedelta(years=1)
    
    start_str = start_date if start_date else previous_year.strftime("%Y-%m-%d")
    end_str = end_date if end_date else yesterday.strftime("%Y-%m-%d")
    
    print("Date range →", start_str, "to", end_str)
    
    # Create parameters object
    params = ClimateDataParams(
        datatype=datatype,
        aggregation=aggregation,
        period=period,
        start=start_str,
        end=end_str,
        extent="statewide",
        lat=lat,
        lng=lng
    )
    
    print("Query parameters:", params)
    
    df = api.get_timeseries_data(params)
    
    # Return structured result with data + summary
    result = {
        "data": df.to_dict(orient="records"),
        "summary": {
            "mean": df.iloc[:, 1].mean(),
            "min": df.iloc[:, 1].min(),
            "max": df.iloc[:, 1].max(),
            "location": {"lat": lat, "lng": lng},
            "period": f"{start_str} to {end_str}"
        }
    }
    
    return result


In [8]:
r = await prompt_process_agent.run("Give me the mean temperature from August 2020 to August 2022 at the coorinates lat:21.501947,lng:-157.966537?")

API initialized: <__main__.ClimateAPI object at 0x132fb70e0>
Date range → 2020-08-01 to 2022-08-01
Query parameters: datatype=<DataType.TEMPERATURE: 'temperature'> aggregation=<Aggregation.MEAN: 'mean'> period=<Period.MONTH: 'month'> start='2020-08-01' end='2022-08-01' extent=<Extent.STATEWIDE: 'statewide'> lat=21.501947 lng=-157.966537
Constructed API request URL: https://api.hcdp.ikewai.org/raster/timeseries?datatype=temperature&aggregation=mean&period=month&start=2020-08-01&end=2022-08-01&extent=statewide&lat=21.501947&lng=-157.966537


In [9]:
r.data

"The mean temperature at the coordinates lat: 21.501947, lng: -157.966537 from August 2020 to August 2022 was approximately 22.3°C. The temperature ranged from a low of 20.16°C in January 2021 to a high of 24.65°C in September 2020. The data shows a slight dip in temperatures during early 2021, with a gradual increase in mid-2022. Let me know if you'd like more detailed information!"

In [14]:
r = await prompt_process_agent.run("What was the max temperature yesterday at random lat/long coordinates on Big Island, Hawai'i?")

API initialized: <__main__.ClimateAPI object at 0x132fb6ae0>
Date range → 2024-04-04 to 2025-04-03
Query parameters: datatype=<DataType.TEMPERATURE: 'temperature'> aggregation=<Aggregation.MAX: 'max'> period=<Period.DAY: 'day'> start='2024-04-04' end='2025-04-03' extent=<Extent.STATEWIDE: 'statewide'> lat=19.5 lng=-155.5
Constructed API request URL: https://api.hcdp.ikewai.org/raster/timeseries?datatype=temperature&aggregation=max&period=day&start=2024-04-04&end=2025-04-03&extent=statewide&lat=19.5&lng=-155.5
API initialized: <__main__.ClimateAPI object at 0x132f590d0>
Date range → 2024-04-04 to 2025-04-03
Query parameters: datatype=<DataType.TEMPERATURE: 'temperature'> aggregation=<Aggregation.MAX: 'max'> period=<Period.DAY: 'day'> start='2024-04-04' end='2025-04-03' extent=<Extent.STATEWIDE: 'statewide'> lat=22.05 lng=-159.8
Constructed API request URL: https://api.hcdp.ikewai.org/raster/timeseries?datatype=temperature&aggregation=max&period=day&start=2024-04-04&end=2025-04-03&extent

In [15]:
r.data

"The maximum temperature yesterday at the coordinates (19.5, -155.5) on the Big Island, Hawai'i, was **9.2°C**."

In [18]:
r = await prompt_process_agent.run("What was the max temperature today at 19.5, -155.5?")

In [19]:
r.data

'The maximum temperature today at 19.5, -155.5 was 13.4°C.'