In [1]:
import redis
import pandas as pd
import json
from io import StringIO
import boto3
from util.loaders import init_outputs, push_data, get_valkey_keys, test_update
from util.loaders import push_logs, get_current_state, TIME_ZONE
from util.logs import get_logger, setup_logging
from comms.s3 import get_objects, upload_file, copy_from_s3
from comms.lfai import chat_completion

In [2]:
import boto3
#TO run this script, need to deploy and port-forward valkey
######################################################
#DO NOT DEPLOY THE FULL BACKEND IF RUNNING THIS SCRIPT
######################################################
#Also need to have LFAI up and running and the out-of-cluster endpoints set
#Then fill in the access / secret keys
access_key = "<access_key_here>"
secret_key = "<secret_key_here>"
read_bucket = "antx-common-data"
write_bucket = "antx-comp-002"
endpoint = "https://s3-fips.us-gov-west-1.amazonaws.com"

s3 = boto3.client("s3", region_name="us-gov-west-1", aws_access_key_id=access_key,
                         aws_secret_access_key=secret_key, endpoint_url=endpoint)

In [3]:
resp = s3.list_objects_v2(Bucket=write_bucket)
files = [x for x in resp['Contents']]
files

[{'Key': '2024-7-29_all_tracks_error.csv',
  'LastModified': datetime.datetime(2024, 7, 30, 14, 23, 24, tzinfo=tzutc()),
  'ETag': '"1ee272cdbbc5fdf46f249837820d8f8b"',
  'Size': 15057,
  'StorageClass': 'STANDARD'},
 {'Key': '2024-7-29_all_tracks_partial.csv',
  'LastModified': datetime.datetime(2024, 7, 30, 3, 33, 55, tzinfo=tzutc()),
  'ETag': '"3cab13f3b82142de1810fa8da9bd191e"',
  'Size': 59123,
  'StorageClass': 'STANDARD'},
 {'Key': '2024-7-30_all_tracks_partial.csv',
  'LastModified': datetime.datetime(2024, 7, 31, 5, 52, 48, tzinfo=tzutc()),
  'ETag': '"31538ff8688411fb7a056a933c0dd62d"',
  'Size': 314665,
  'StorageClass': 'STANDARD'},
 {'Key': '2024-7-31_all_tracks.csv',
  'LastModified': datetime.datetime(2024, 7, 31, 23, 4, 20, tzinfo=tzutc()),
  'ETag': '"cd91193b26ba86ce563f354fa9a0597e"',
  'Size': 252145,
  'StorageClass': 'STANDARD'},
 {'Key': '2024_07_26_all_tracks_test.csv',
  'LastModified': datetime.datetime(2024, 7, 26, 17, 34, 52, tzinfo=tzutc()),
  'ETag': '"15

In [4]:
s3.download_file(Bucket=write_bucket, Key="2024-7-30_all_tracks_partial.csv", Filename="./test.csv")
df = pd.read_csv("./test.csv")
df.head()

Unnamed: 0,start,end,track1,track2,track3,track4,state,notes,delay type
0,7/30/2024 6:55,7/30/2024 6:56,You You You,,,,Trial End,,
1,7/30/2024 6:55,7/30/2024 6:56,,,You You You,You You You,,,
2,7/30/2024 6:56,7/30/2024 6:57,,,You You You,You You You,,,
3,7/30/2024 6:56,7/30/2024 6:57,You You You,,,,,,
4,7/30/2024 6:57,7/30/2024 6:58,,,You You You,You You You,,,


In [5]:
df = df.fillna("")
df["start"] = pd.to_datetime(df["start"])
df["end"] = pd.to_datetime(df["end"])
df["start"] = df["start"].dt.tz_localize(tz=TIME_ZONE)
df["end"] = df["end"].dt.tz_localize(tz=TIME_ZONE)
df = df.sort_values("start", ascending=True, ignore_index=True)
df.head()

Unnamed: 0,start,end,track1,track2,track3,track4,state,notes,delay type
0,2024-07-30 06:55:00-07:00,2024-07-30 06:56:00-07:00,You You You,,,,Trial End,,
1,2024-07-30 06:55:00-07:00,2024-07-30 06:56:00-07:00,,,You You You,You You You,,,
2,2024-07-30 06:56:00-07:00,2024-07-30 06:57:00-07:00,,,You You You,You You You,,,
3,2024-07-30 06:56:00-07:00,2024-07-30 06:57:00-07:00,You You You,,,,,,
4,2024-07-30 06:57:00-07:00,2024-07-30 06:58:00-07:00,,,You You You,You You You,,,


In [6]:
current_state = "Trial Start"
delay_type = ""
date = "07302024"
from api import init_run
from util.objects import MetricTracker
from util.loaders import get_valkey_keys, get_current_run, format_for_push, parse_date
from comms.valkey import get_output_frame
date = parse_date(date)
init_run(date)
prefix, run_id, status = get_current_run()
metrics = MetricTracker()
valkey_keys = get_valkey_keys(prefix, run_id)

def get_real_state(row):
    global current_state
    global delay_type
    global metrics
    data = {
        "start_time":row["start"],
        "end_time":row["end"],
        "track1":row["track1"],
        "track2":row["track2"],
        "track3":row["track3"],
        "track4":row["track4"],
        "state":current_state,
        "delay_type":delay_type
    }
    data = chat_completion(data)
    current_state=data["state"]
    delay_type = data["delay_type"]
    push_data({data["start_time"]:data}, metrics, valkey_keys)

df.apply(get_real_state, axis=1)
new_df = get_output_frame(valkey_keys["output_key"])
start_date = new_df['start'].min().strftime("%Y-%-m-%-d")
new_df = format_for_push(new_df)
file_path = f"./{start_date}_all_tracks_fixed.csv"
df.to_csv(file_path, index=False)

[2024-08-01 16:41:06,590] INFO [valkey.py.publish_message:57] | Publishing message {'message_type': 'status'} to channel events
[2024-08-01 16:41:06,839] INFO [valkey.py.publish_message:57] | Publishing message {'message_type': 'start', 'bucket': 'antx', 'prefix': 'Distribution-Statement-D/2024/07/30/', 'run_id': 1} to channel events
[2024-08-01 16:41:06,888] DEBUG [valkey.py.set_output_frame:87] | Saving frame to Distribution-Statement-D/2024/08/01/_output:
                 start                  end        track1 track2 track3  \
0  2024-07-30T06:55:00  2024-07-30T06:56:00   You You You                 
1  2024-07-30T06:55:00  2024-07-30T06:56:00   You You You                 

  track4        state notes delay type time_to_change  
0                 RTB                           01:44  
1         Trial Start                           01:59  
[2024-08-01 16:41:06,942] DEBUG [valkey.py.set_output_frame:87] | Saving frame to Distribution-Statement-D/2024/08/01/_output:
                

In [None]:
#if you want to upload to S3:
#the key will be{start_date}_all_tracks_fixed.csv
#s3.upload_file(file_path, write_bucket, file_path[2:]

In [8]:
new_df.tail(50)

Unnamed: 0,start,end,track1,track2,track3,track4,state,notes,delay type
439,7/30/2024 14:16,7/30/2024 14:17,,,,,,,
440,7/30/2024 14:17,7/30/2024 14:18,,,,,Pre Trial Start,,
441,7/30/2024 14:18,7/30/2024 14:19,,,,,,,
442,7/30/2024 14:19,7/30/2024 14:20,,,,,,,
443,7/30/2024 14:20,7/30/2024 14:21,,,,I have to turn our control off of that code. ...,,,
444,7/30/2024 14:21,7/30/2024 14:22,"Zero three, Papa Charlie, am I clear to set s...",,Thank you. Thank you. Thank you. Thank you.,"C.R. 3. Papa Charlie, I'm not clear. She said...",,,
445,7/30/2024 14:22,7/30/2024 14:23,"Charlie, C.O.9, Papa Charlie, go ahead. Just ...",,Thank you. Thank you. Thank you. Thank you.,We'll all the start floating in on you after ...,Delay End,,
446,7/30/2024 14:23,7/30/2024 14:25,"Thank you. Copy Charlie, Sierra 3. Could you ...",,Thank you. Thank you. Thank you. Thank you. T...,"Yeah, we should just end of the day on the, u...",Mistrial,,
447,7/30/2024 14:25,7/30/2024 14:26,"Copy Charles, C.I. Go ahead. Can you readjust...",,Thank you. Thank you. Thank you. Thank you.,That was the rough one. Yeah. We speak not of...,Trial Start,,
448,7/30/2024 14:26,7/30/2024 14:27,We're going to be We're going to be. I don't ...,,Thank you. Thank you. Thank you. Thank you.,Remember when I started with MapG. Same thing...,,,
