<a href="https://colab.research.google.com/github/hammad93/hurricane-net/blob/main/hurricane_net_chatgpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install openai

Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/73.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.27.8


In [3]:
# set OpenAI API key to environment variable
import os
os.environ["OPENAI_API_KEY"] = ''

The following is just a copy and paste of hurricane_net_chatgpt.py from github.com/hammad93/hurricane-net

In [37]:
config = {
    'api_url' : 'http://fluids.ai:1337/'
}
threads = {}
from string import Template
import requests
import pandas as pd
import openai
import json
import os

def storm_forecast_prompts_sequentially(data):
  hours = [6, 12, 24, 48, 72, 96, 120]
  prompt = Template('''Please provide  a forecast for $future hours in the future from the most recent time from the storm.
  The response will be a JSON object with these attributes:
      "time" which is the predicted time in ISO 8601 format
      "lat" which is the predicted latitude in decimal degrees
      "lon" which is the predicted longitude in decimal degrees
      "wind_speed" which is the predicted maximum sustained wind speed in knots.

  Table 1. The historical records the includes columns representing measurements for the storm.
  The wind_speed column is in knots representing the maxiumum sustained wind speeds.
  The lat and lon are the geographic coordinates in decimal degrees.
  time is sorted ascending in ISO 8601 format and the most recent time is the last entry.
  $data
  ''')
  reflection_prompt = Template('''Please quality check the response. The following are requirements,
  - It provides a forecast for $future hours in the future from the most recent time.
  - It does not simply respond with the input data

  Provide either True or False if it is an appropriate response. If it's False, add a comma and explain why and provide a better response.
  ''')
  return [
    {
      "forecast_hour" : hour,
      "prompt" : prompt.substitute(future = hour, data = data),
      "reflection" : reflection_prompt.substitute(future = hour)
    }
        for hour in hours
  ]

def chatgpt(prompt, model_version = "gpt-3.5-turbo", id = None):
    '''
    Given the prompt, this will pass it to the version of ChatGPT defined.
    It's meant for forecasts of global tropical storms but can have a range of options.

    Input
    -----
    prompt String
        The initial message to pass to ChatGPT
    system String
        The system message based on the current OpenAI API
    model_version String
        Which model to use
    id String
        The thread id, will be created if none exist.

    Returns
    -------
    pd.DataFrame
    '''
    global threads
    openai.api_key = os.environ.get('OPENAI_API_KEY')

    # generate thread or message
    basic = [{"role": "system", "content": "Please act as a forecaster and a helpful assistant. Responses should be based on historical data and forecasts must be as accurate as possible."},
      {"role": "user", "content": prompt}
    ]
    if id :
      print(id)
      # create id if it doesn't exist
      if not threads.get(id, False) :
        print(f'Adding id, {id} to threads.')
        threads[id] = basic
      thread = threads[id]
    else :
      thread = basic

    response = openai.ChatCompletion.create(
        model=model_version,
        messages=thread
    )
    text = response["choices"][0]["message"]["content"]
    print(text)
    if id and threads.get(id, False) :
      print(f"Adding response to thread {id}.")
      threads[id] += [{"role": "user", "content": prompt},
       {"role": "assistant", "content": text}]

    # Find the indices of the first and last curly braces in the text
    start_index = text.find('{')
    end_index = text.rfind('}')

    # Extract the JSON string from the text
    json_string = text[start_index:end_index+1]
    print(json_string)
    # Parse the JSON string into a Python object
    json_object = None
    try :
      json_object = json.loads(json_string)
    except Exception as e :
      print(f"Couldn't parse the JSON in the response, {e}")

    return {
        "text" : text,
        "json" : json_object
    }


def chatgpt_forecast_live(model_version = "gpt-3.5-turbo"):
    '''
    This will pull in the live storms across the globe and engineer
    prompts that will allow us to ingest forecasts from ChatGPT

    Returns
    -------
    list(pd.DataFrame) A list of DataFrames that have the columns
        id, time, lat, lon, and wind_speed
    '''
    # get the current live tropical storms around the globe
    live_storms = get_live_storms()
    prompts = get_prompts(live_storms)
    # capture the forecast from ChatGPT
    forecasts = []
    for prompt in prompts:
        forecasts.append(chatgpt_forecast(prompt, model_version))
    return forecasts

def chatgpt_forecast(prompt, model_version = "gpt-3.5-turbo"):
    '''
    Given the prompt, this will pass it to the version of ChatGPT defined.
    It's meant for forecasts of global tropical storms but can have a range of options.

    Input
    -----
    prompt String
        The initial message to pass to ChatGPT
    system String
        The system message based on the current OpenAI API
    model_version String
        Which model to use

    Returns
    -------
    pd.DataFrame
    '''
    openai.api_key = os.environ.get('OPENAI_API_KEY')
    response = openai.ChatCompletion.create(
        model=model_version,
        messages=[
                {"role": "system", "content": "Please act as a forecaster and a helpful assistant. Responses should be based on historical data and forecasts must be as accurate as possible."},
                {"role": "user", "content": prompt},
            ]
        )
    text = response["choices"][0]["message"]["content"]
    print(text)
    # Find the indices of the first and last curly braces in the text
    start_index = text.find('{')
    end_index = text.rfind('}')

    # Extract the JSON string from the text
    json_string = text[start_index:end_index+1]

    # Parse the JSON string into a Python object
    json_object = json.loads(json_string)

    # Extract the relevant information from the object
    forecasts = json_object['forecasts']

    return pd.DataFrame(forecasts)

def get_live_storms():
    '''
    Upon calling this function, the live tropical storms around the global
    will be returned in a JSON format. Each of the storms returned will have
    the historical records along with in.

    Returns
    -------
    df pandas.DataFrame
        The records include the columns id, time, lat, lon, wind_speed
    '''
    # make the request for live data
    response = requests.get(f"{config['api_url']}live-storms")
    if response :
        data = response.json()
    else :
        print(f'There was an error getting live storms, {response.content}')
        return response
    return pd.DataFrame(data)

def get_prompts(df):
    '''
    Utilizing the current global tropical storms, we will generate prompts
    for a LLM such as ChatGPT to provide forecasts. This function will
    generate prompts for each storm

    Intput
    ------
    df pd.DataFrame
        The records include the columns id, time, lat, lon, wind_speed.
    '''
    unique_storms = set(df['id'])
    prompts = []
    # apply each storm to the prompt template
    for storm in unique_storms:
        prompt = f'''
I want you to act like a forecaster that gives a general idea of the future of the storm even though it will not be an official forecast.
Please provide forecasts for 12, 24, 36, 48, 72, 96, 120 hours in the future from the most recent time in Figure 1.
The response will be JSON formatted with "forecasts" as the only key. The value of the key is a list of forecast objects.
Each forecast object has five attributes:
    "id" which identifies the storm
    "time" which is the predicted time in ISO 8601 format
    "lat" which is the predicted latitude in decimal degrees
    "lon" which is the predicted longitude in decimal degrees
    "wind_speed" which is the predicted maximum sustained wind speed in knots.
The response must be in JSON format, and the JSON characters must be at the beginning of the response.
If you wish to add additional comments, it must be after the JSON data.

Figure 1. The historical records the includes columns representing measurements for storm {storm}.
The wind_speed column is in knots representing the maxiumum sustained wind speeds.
The lat and lon are the geographic coordinates in decimal degrees.

In JSON,
{df[df['id'] == storm].to_json()}
        '''
        prompts.append(prompt)
        print(prompt)
    return prompts


In [38]:
live_storms = get_live_storms()
live_storms

Unnamed: 0,id,time,lat,lon,wind_speed,wind_speed_mph,wind_speed_kph
0,WP982023,2023-08-17 12:00:00,28.0,148.9,15,27.78,17.2617
1,WP982023,2023-08-17 18:00:00,28.1,148.8,15,27.78,17.2617
2,WP982023,2023-08-18 00:00:00,28.2,148.6,15,27.78,17.2617
3,WP982023,2023-08-18 06:00:00,28.2,148.4,15,27.78,17.2617
4,WP982023,2023-08-18 12:00:00,28.1,148.1,15,27.78,17.2617
...,...,...,...,...,...,...,...
191,AL992023,2023-08-18 18:00:00,14.0,-45.2,25,46.30,28.7695
192,AL992023,2023-08-19 00:00:00,14.7,-46.6,30,55.56,34.5234
193,AL992023,2023-08-19 06:00:00,15.4,-47.8,30,55.56,34.5234
194,AL992023,2023-08-19 12:00:00,16.1,-48.8,30,55.56,34.5234


In [None]:
# generate prompts for one of the storms
max_historical_track = 4 * 3 # days, approx if 6 hour interval
example_id = 'AL992023'
example_data = live_storms.query(f"id == '{example_id}'").sort_values(by='time', ascending=False).iloc[:max_historical_track]
example_data_input = example_data.drop(columns=['id', 'wind_speed_mph', 'wind_speed_kph']).to_json(indent=2, orient='records')
print(example_data_input)
prompts = storm_forecast_prompts_sequentially(example_data_input)

In [None]:
prompts

In [43]:
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as executor:
    random_prefix = '1432143'
    results = list(executor.map(lambda p: chatgpt(*p),
     [(prompt["prompt"], 'gpt-3.5-turbo', f"{random_prefix}_{index}") for index, prompt in enumerate(prompts)]))

1432143_01432143_1
Adding id, 1432143_1 to threads.

Adding id, 1432143_0 to threads.
1432143_2
Adding id, 1432143_2 to threads.
1432143_3
Adding id, 1432143_3 to threads.
1432143_41432143_5
Adding id, 1432143_5 to threads.

Adding id, 1432143_4 to threads.
Based on the historical data provided, the forecast for 6 hours in the future from the most recent time of the storm would be:

{
  "time": "2023-08-19T18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}
Adding response to thread 1432143_0.
{
  "time": "2023-08-19T18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}
1432143_6
Adding id, 1432143_6 to threads.
Based on the provided historical data, we can make a forecast for the storm 72 hours in the future. The wind speeds have been consistent at 30 knots for the last four measurements. Assuming this pattern continues, we can forecast a maximum sustained wind speed of 30 knots for the next 72 hours.

Let's also assume that the storm's path remains consistent, wit

In [44]:
# add reflection step here
with concurrent.futures.ThreadPoolExecutor() as executor:
    random_prefix = '1432143'
    results_reflection = list(executor.map(lambda p: chatgpt(*p),
     [(prompt["reflection"], 'gpt-3.5-turbo', f"{random_prefix}_{index}") for index, prompt in enumerate(prompts)]))

1432143_0
1432143_1
1432143_2
1432143_3
1432143_4
1432143_5
{
  "time": "2023-08-22T18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}
Adding response to thread 1432143_4.
{
  "time": "2023-08-22T18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}
1432143_6
Apologies for the error in the previous response. Based on the provided historical data, the forecast for 48 hours in the future from the most recent time of the storm is as follows:

{
  "time": "2023-08-21 18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}

Please note that this forecast assumes that the storm's movement and intensity will remain consistent based on historical patterns.
Adding response to thread 1432143_3.
{
  "time": "2023-08-21 18:00:00",
  "lat": 16.6,
  "lon": -49.9,
  "wind_speed": 30
}
I apologize for the confusion, it seems I made a mistake in the calculation. Let me recalculate the forecast using the correct data.

Based on the historical data provided, the storm has been

In [None]:
import openai
model_version = "gpt-3.5-turbo"
example_messages = [
  {"role": "system", "content": "Please act like an expert in tropical storm forecasting. Responses will be used for research and not for official purposes. Responses should be based on historical data and forecasts must be as accurate as possible."},
  {"role": "user", "content": example_prompt},
]
response = openai.ChatCompletion.create(
        model=model_version,
        messages=example_messages
        )
text = response["choices"][0]["message"]["content"]
print(text)

Based on the historical data provided, the forecast for 12 hours in the future from the most recent time is as follows:

{
  "id": "EP72023",
  "time": "2023-08-13T12:00:00",
  "lat": 15.2,
  "lon": -114.6,
  "wind_speed": 40
}


In [None]:
reflection_message = example_messages + [
  {"role": "assistant", "content": text},
  {"role": "user", "content": example_reflection}]
response = openai.ChatCompletion.create(
        model=model_version,
        messages=reflection_message
        )
text = response["choices"][0]["message"]["content"]
print(text)

False, the forecast provided is not appropriate as it simply repeats the most recent entry from the historical data. The forecast should be based on the historical data but adjusted to reflect the predicted conditions 12 hours in the future. I apologize for the oversight.

A revised forecast for 12 hours in the future from the most recent time, based on the historical data, would be as follows:

{
  "id": "EP72023",
  "time": "2023-08-13T12:00:00",
  "lat": 15.4,
  "lon": -115.4,
  "wind_speed": 45
}


<OpenAIObject chat.completion id=chatcmpl-7mvvCsLE0FBgOzy8pB4uCjlakJ3j7 at 0x7c69d34dc630> JSON: {
  "id": "chatcmpl-7mvvCsLE0FBgOzy8pB4uCjlakJ3j7",
  "object": "chat.completion",
  "created": 1691896750,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "{\n  \"forecasts\": [\n    {\n      \"id\": \"EP72023\",\n      \"time\": \"2023-08-10T00:00:00\",\n      \"lat\": 10.0,\n      \"lon\": -102.5,\n      \"wind_speed\": 25\n    },\n    {\n      \"id\": \"EP72023\",\n      \"time\": \"2023-08-10T06:00:00\",\n      \"lat\": 10.0,\n      \"lon\": -104.0,\n      \"wind_speed\": 25\n    },\n    {\n      \"id\": \"EP72023\",\n      \"time\": \"2023-08-10T12:00:00\",\n      \"lat\": 10.0,\n      \"lon\": -105.2,\n      \"wind_speed\": 25\n    },\n    {\n      \"id\": \"EP72023\",\n      \"time\": \"2023-08-10T18:00:00\",\n      \"lat\": 10.0,\n      \"lon\": -106.0,\n      \"wind_speed\": 25\n    },\n    