In [1]:
import sys
sys.path.append('../')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv(), override=True)

ModuleNotFoundError: No module named 'dotenv'

---

In [4]:
import requests
import json
from typing import Any
from tqdm import tqdm
import os
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

from group_data import df_pipe
from src.prompts.options import next_state_options
from src.prompts.system_prompt_quotes_v3 import sys_prompt
from src.prompts.user_prompt_quotes_v3 import (
    examples, 
    user, 
    parse_data_object, 
    build_user_message
)
from src.enums.states import States
from src.enums.tracks import track_mapping
import tiktoken
import re
from pathlib import Path

ModuleNotFoundError: No module named 'tqdm'

### Import Data + Initialize Encoder

In [5]:
data_path = '/Users/americanthinker/Downloads/antx_data/2024-05-14_all_tracks.csv'
test_data = '/Users/americanthinker/Downloads/antx_data/2024-05-22_all_tracks.csv'
data_dir = '/Users/americanthinker/Downloads/antx_data/'
csv_paths = [path for path in Path(data_dir).iterdir() if path.name.endswith('csv')]

In [6]:
def convertCSV_to_records(path: str) -> list[dict]:
    return pd.read_csv(path).fillna('').to_dict(orient='records')
def get_valid_states(records: list[dict]) -> list[dict]:
    return [item for item in records if any(item['state'])]

In [7]:
all_records = [convertCSV_to_records(path) for path in csv_paths]
valid_records = [get_valid_states(record_list) for record_list in all_records]

[13, 13, 18, 14, 26, 21, 28, 9]

In [13]:
def parse_data_object(data_object: dict[str,Any]) -> str:
    '''
    Given a data object representing a single minute of radio tracks,
    this function parses the tracks, maps the original track names to 
    their functional names, and joins them with a double new-line break.
    '''
    tracks = [{k:v} for k,v in data_object.items() if k.startswith('track')]
    mapped_tracks = []
    for track in tracks:
        for key, value in track.items():
            mapped_tracks.append(f'{track_mapping[key]}: {value}')
    return '\n\n'.join(mapped_tracks)

In [14]:
record = valid_records[4][0]
record

{'start': '5/14/2024 8:42',
 'end': '5/14/2024 8:42',
 'track1': '',
 'track2': '',
 'track3': 'All stations, all stations, be advised, trial start, trial start.',
 'track4': 'All stations, all stations, be advised, trial start, trial start.',
 'state': 'Trial Start',
 'notes': '',
 'delay type': ''}

In [19]:
from src.enums.tracks import Tracks

In [20]:
Tracks

<enum 'Tracks'>

In [None]:
build_user_message(

In [71]:
data = df_pipe(data_path, fill_state=False, merge_tracks=True)
records = pd.read_csv(test_data).fillna('').to_dict(orient='records')
encoder = tiktoken.get_encoding('cl100k_base')

In [73]:
#parse out only the data rows where a state change is present
# states = [{k:v} for k,v in test.items() if any(v['state'])]
valid_states = [item for item in records if any(item['state'])]

### Peek at a sample User Message

In [8]:
#set some constatns
minute_value = '12:26'
tracks = parse_data_object(data[minute_value])
current_state = [s for s in data[minute_value]['state'] if s][0]

user_message = build_user_message(examples, current_state, tracks, next_state_options[current_state]['Options'])
print(user_message)


As context you will be given a Current State and a series of radio transmissions broken out into separate tracks. 
The transmissions represent dialogue between stakeholders involved in maritime UAV test and evaluation trials.
  
Given the context perform the following two sequential tasks:
1. Decide if a change to the Current State is warranted given the transmissions. 
   - If no change is warranted simply return a json object as follows:
   - Output: {"current_state": "<current state>"}

2. Based on your decision from step 1, if a change in state is warranted then choose from among the next state options and return your output to include, current state, predicted state, and your reasoning for choosing the next predicted state.  Follow the output guidelines below:
    - Output: {"predicted_state": "<predicted state>", "reason": "<reason you chose the predicted state>"}


EXAMPLES
--------------------------
Use the following examples as your guide for predicting whether or not to chan

### View total token length of combined messages

In [9]:
len(encoder.encode(user_message + sys_prompt))

2590

### LLM Call setup

In [10]:
def format_response(response: requests.models.Response):
    json_response = response.json()
    return json_response['choices'][0]['message']['content'].strip()

In [11]:
def chat_completion(user_prompt: str,
                    temperature: float=0.8,
                    max_tokens: int=4096,
                    stream: bool=False,
                    raw: bool=False
                    ) -> str | dict:
    url = os.environ['LEAPFROG_URL']
    api_key = os.environ['LEAPFROG_API_KEY']
    headers = {
    'Authorization': f'Bearer {api_key}',
    'Content-Type': 'application/json'
    }
    data = {
    "model": "vllm",
    "messages": [
        {
            "role": "system",
            "content": sys_prompt
        },
        {
            "role": "user",
            "content": user_prompt,
        }
    ],
    "stream": stream,
    "temperature": temperature,
    "max_tokens": max_tokens
}
    try:
        response = requests.post(url, headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            if raw:
                return response.json()
            else: return format_response(response)
        else:
            print('Response is not 200')
            return response
    except Exception as e:
        print(e)

## Test Harness

In [94]:
def original_test_harness(test_data: list[dict], temperature: float=0.5) -> list[str]:
    current_state = 'Trial Start'
    responses = []
    for i, data_point in enumerate(tqdm(test_data)):
        minute_value = list(data_point.keys())[0]
        tracks = parse_data_object(data_point[minute_value])
        predicted_state = [s for s in data_point[minute_value]['state'] if s][0]
        user_message = build_user_message(examples, current_state, tracks, next_state_options[current_state]['Options'])
        responses.append({'response':chat_completion(user_message, temperature=temperature, max_tokens=250), 'label':predicted_state, 'minute': minute_value})
        current_state = predicted_state
    return responses

In [143]:
def new_test_harness(test_data: list[dict], temperature: float=0.5) -> list[str]:
    current_state = 'Trial Start'
    responses = []
    for i, data_point in enumerate(tqdm(test_data)):
        try:
            minute_value = data_point['minute']
            tracks = parse_data_object(data_point)
            predicted_state = data_point['state']
            user_message = build_user_message(examples, current_state, tracks, next_state_options[current_state]['Options'])
            responses.append({'response':chat_completion(user_message, temperature=temperature, max_tokens=250), 'label':predicted_state, 'minute': minute_value})
            current_state = predicted_state
        except Exception as e:
            print(e)
            continue
    return responses

In [113]:
# test = original_test_harness(states[1:3], temperature=0.8)
responses = new_test_harness(valid_states)

100%|███████████████████████████████████████████████████████| 13/13 [00:30<00:00,  2.35s/it]


In [173]:
all_results = []

for test in valid_records[:3]:
    try:
        all_results.append(new_test_harness(test))
    except Exception as e:
        print(e)
        continue

100%|███████████████████████████████████████████████████████| 13/13 [00:28<00:00,  2.18s/it]
100%|███████████████████████████████████████████████████████| 13/13 [00:41<00:00,  3.18s/it]
100%|███████████████████████████████████████████████████████| 18/18 [00:40<00:00,  2.27s/it]


In [157]:
def parse_test_results(results: list[dict]) -> list[dict]:
    num_results = len(results)
    parse_errors = 0
    hits = 0
    errors = []
    for res in results:
        try:
            label = res['label'].strip()
            subbed = json.loads(re.sub("</s>", '', res["response"]))
            try:
                predicted = subbed['predicted_state'].strip()
                
            except KeyError:
                predicted = subbed['current_state'].strip()
            if predicted == label:
                hits += 1
            else: errors.append(res)
        except json.JSONDecodeError:
            parse_errors += 1
            print(f"Parse ERROR: \t{res['response']}")
    parse_error_rate = round(parse_errors/num_results, 2)
    hit_rate = round(hits/num_results, 2)
    print(f'Parse Error Rate: {parse_error_rate}')
    print(f'Hit Rate: {hit_rate}')
    print(f'Hits: {hits} out of {num_results}')
    return parse_errors, errors, hit_rate

In [122]:
miss_verbiage = [
    'To be advised, we got a helo flying low over the north corner of the field.',
    "Yeah. Yeah? Mistrial? Mistrial. OK. All call, all call. Trial completion, trial completion. So, yeah. It's passed. So there were, Kate filled me in, there were a few frames, maybe there was something in frame, but they didn't have a lawyer now. Go for Travis.",
    "So, this will be trial two, two, one. And if you're ready, I'm gonna give the call. All call, all call. Trial 2-2-1 has begun. All call, all call. Trial 2-2-1 has begun.",
    "the trial and we'll just All calls, trial end. Let's have red vehicle reset, and we will restart. Yeah, reset those bad boys.",
    "I'm like a Mistrial was requested trial and trial and resetting for next trial"
]

In [176]:
rates = []
errors = []
for i, atest in enumerate(all_results):
    try:
        atuple = parse_test_results(atest)
        rates.append(atuple[2])
        errors.append(atuple[1])
    except Exception as e:
        print(f'ERROR at {i}: due to {e}')
        continue

In [175]:
sum(rates)/len(rates)

0.45333333333333337

In [177]:
errors

[[{'response': '{"predicted_state": "Trial End", "reason": "Radio transmissions indicate that the GARC is still idle and not moving, which suggests that the trial has not started. Therefore, it is reasonable to predict that the trial will end without any progress."}',
   'label': 'Delay Start',
   'minute': '09:33'},
  {'response': '{"predicted_state": "RTB", "reason": "Radio transmissions indicate that the boats are in transit to their base of operations, thus signaling the end of the trials."}',
   'label': 'Trial End',
   'minute': '09:40'},
  {'response': '{"current_state": "Trial End"}',
   'label': 'Trial Start',
   'minute': '10:11'},
  {'response': '{"current_state": "Trial End"}</s>',
   'label': 'Delay Start',
   'minute': '10:33'},
  {'response': '{"current_state": "Trial Start"}',
   'label': 'Trial End',
   'minute': '11:01'},
  {'response': '{"current_state": "Trial End"}</s>',
   'label': 'Trial Start',
   'minute': '11:07'},
  {'response': '{"current_state": "Trial End"