In [1]:
from pydantic import BaseModel, Field
from typing import List, Literal
from datetime import datetime
from loguru import logger
import instructor
import openai
import os

In [2]:
SYSTEM_PROMPT = ("THE CURRENT DATE AND TIME IS {current_datetime}.\n"
                 "THE USER'S LOCATION IS {user_location}\n"
                 "You are WeatherBot, a chatbot that can answer any user query about the weather.\n"
                 "You will receive a user query and must respond with the outlined JSON structure.\n"
                 "When answering, please adhere to the following rules:\n"
                 "- If the query is not weather related, answer the user query as best as you can and remind them that you are a weather bot. \n"
                 "- If the query is about the weather and the data available to you is sufficient to provide an answer, answer the user query as WeatherBot. Open your query with a succint summary of the weather and give the most important details.\n"
                 "- Only answer weather queries based on the information you receive from a weather API, found in the DATA STORE. \n"
                 "- If the data is insufficient to answer the query or there is no data, you can request data from the API by returning sufficient_data_check as False and requesting additional data using the other fields. \n"
                 "- We need to limit how many API calls we make, so only request data if it's really necessary. Try and work with what is in the data store if possible. \n"
                 "- If the data is insufficient, include a holding message in the response field while we request data from the API. \n"
                 "- If you need to make an API request please ensure that it will provide the right data for another language model to answer the query. For example if the request is for the weather tomorrow, you will need data at hourly intervals to give a complete answer. \n"
                 "- For variables, you can select from the provided list. Please only select the most relevant variables to answer the query. \n"
                 "- If a location is not specified in the query, return location as empty. \n"
                 "- If the request doesn't contain a specific time, make sure that the times field will request enough data from the API for a language model to answer the query with the received data.\n"
                 "- Times can only be values on the hour, e.g. 2022-01-01T00:00:00Z, 2022-01-01T01:00:00Z, 2022-01-01T02:00:00Z, etc.\n"
                 "- If the user request is for the weather now, please request the data at the previous hour mark plus one additional hour of data.\n"
                 "- If you are able to answer the weather query and don't need to request more data, explain what data you used from the data store by specifying the variables you used in the variables field, the location you used in the location field and the times you used in the times field. \n"
                 "- Don't go into too much detail, be succinct. No one likes to hear every single detail about the weather.\n\n"

                 "______________________________\n"
                 "DATA STORE\n"
                 "______________________________\n"
                 "{data_store}\n"
                 "______________________________\n"
                 "VARIABLES"
                 "______________________________\n"
                 "{vars}\n"
                 )

In [3]:
METSERVICE_VARIABLES = [
    'air.humidity.at-2m',
    'air.pressure.at-sea-level',
    'air.temperature.at-2m',
    'air.visibility',
    'atmosphere.convective.potential.energy',
    'cloud.base.height',
    'cloud.cover',
    'precipitation.rate',
    'radiation.flux.downward.longwave',
    'radiation.flux.downward.shortwave',
    'wind.direction.at-10m',
    'wind.direction.at-100m',
    'wind.speed.at-10m',
    'wind.speed.at-100m',
    'wind.speed.eastward.at-100m',
    'wind.speed.eastward.at-10m',
    'wind.speed.gust.at-10m',
    'wind.speed.northward.at-100m',
    'wind.speed.northward.at-10m',
    'wave.height',
    'wave.height.max',
    'wave.direction.peak',
    'wave.period.peak',
    'wave.height.above-8s',
    'wave.height.below-8s',
    'wave.period.above-8s.peak',
    'wave.period.below-8s.peak',
    'wave.direction.above-8s.peak',
    'wave.direction.below-8s.peak',
    'wave.direction.mean',
    'wave.directional-spread',
    'wave.period.tm01.mean',
    'wave.period.tm02.mean',
    'current.speed.eastward.at-sea-surface',
    'current.speed.eastward.at-sea-surface-no-tide',
    'current.speed.eastward.barotropic',
    'current.speed.eastward.barotropic-no-tide',
    'current.speed.northward.at-sea-surface',
    'current.speed.northward.at-sea-surface-no-tide',
    'current.speed.northward.barotropic',
    'current.speed.northward.barotropic-no-tide',
    'sea.temperature.at-surface',
    'sea.temperature.at-surface-anomaly',
]

In [4]:
class ModelResponseToWeatherQuery(BaseModel):
    """Request model for weather queries."""
    response: str = Field(..., title='Response',
                          description='Response from WeatherBot to the user query')
    weather_query_check: bool = Field(..., title='Weather query check',
                                      description='Is the user message a weather query?')
    sufficient_data_check: bool = Field(..., title='Sufficient data check',
                                        description='Does the data_store contain sufficient data to answer the query?')
    data_check_rationale: str = Field(..., title='Data check rationale',
                                      description='Reason for the result you have given for the sufficient_data_check')
    location: str
    variables: List[str]
    start_time: datetime = Field(..., title='Start Time',
                               description='The first time to request data for, formatted as :%Y-%m-%dT00:00:00Z')
    end_time: datetime = Field(..., title='End Time',
                                description='The last time to request data for, formatted as :%Y-%m-%dT00:00:00Z')
    interval: Literal['hour','day'] = Field(..., title='Time interval', description='Time interval for the data.')
                                            
    
    # interval: Optional[str] = Field(
    #     None, title='Time interval', description='Time interval for the data. \
    #         For example, "1h" for hourly data, "3h" for 3 hour intervals etc.')
    # repeat: Optional[int] = Field(None, title='Number of intervals',
    #                               description='Number of instances to request the data for. \
    #         For example, if interval is "1h", "3" will request 3 hours of data.')

In [5]:
pydantic_client = instructor.apatch(
    openai.OpenAI(api_key=os.environ['OPENAI_API_KEY']))

  pydantic_client = instructor.apatch(


In [6]:
formatted_data = "No data has been requested yet."

system_prompt = SYSTEM_PROMPT.format(
    current_datetime=datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
    user_location="unknown",
    data_store=formatted_data,
    vars=METSERVICE_VARIABLES
)

messages=[
        {
            "role": "system", 
            "content": system_prompt
        },
        {
            "role": "user", 
            "content": "What is the weather like in Auckland tomorrow?"
        }
    ]

response_model=ModelResponseToWeatherQuery

response = pydantic_client.chat.completions.create(
    model="gpt-4-turbo-preview",
    response_model=response_model,
    messages=messages,
)

assert isinstance(response, ModelResponseToWeatherQuery)
print(f"GPT response: {response}")

GPT response: response='Holding message while data is being fetched.' weather_query_check=True sufficient_data_check=False data_check_rationale='Insufficient data available to provide a direct answer.' location='Auckland' variables=[] start_time=datetime.datetime(2024, 2, 19, 0, 0, tzinfo=TzInfo(UTC)) end_time=datetime.datetime(2024, 2, 20, 0, 0, tzinfo=TzInfo(UTC)) interval='day'
