# Writing Model Agnostic LLm applications with centralized Observability, Testing and Evaluation and Security
In order to write LLM based applications that intend to be productionaized, there are a few important non-functional features that are critical for long term success. This demo notebook addresses some methods for handling two of the most critical of these features: 
1. Model agnostic code
    a. 1. Different models need to be able to be used and tested by only making changes to a configuration. This must be done to avoid model lock-in, optimize accuracy and costs, and enable a path to model upgrades.
2. Centralized observability
    a. Testing and Evaluation
       1. LLMs have a high sensitivity to change and exhibit a high degree of unpredictable side effects to change. Changes in the prompt, from development, new data, RAG context, etc... are unavoidable.
   b. Security
       1. Company level security checks are critical
       2. Shared secutiry checks reduce redundancy and increase accountability

- Model agnostic code is acheived in this demo by using the open source litellm library
- Centralized Observability is acheived by using the open source langfuse libraries and application

## Configuration and Basic Usage
Configuration for both libraries can mostly be done through ENV VARS

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "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"
os.environ["ANTHROPIC_API_KEY"] = "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"

gpt35model = "gpt-3.5-turbo"
sonnet = "claude-3-sonnet-20240229"
gpt4omini = "gpt4o_mini"

model = gpt4omini

### LiteLLM interface
LiteLLM provides a very simple completion interface that works with 100"s of models and it is easy to create adapters for custom models. The inerface is the same as OpenAI completions API. It also has the features you would expect from OpenAI interface like async.

In [None]:
from litellm import completion
import os

def basic_litellm(content):
    response = completion(
      model=model,
      messages=[{ "content": content,"role": "user"}]
    )

    return response.choices[0].message.content

content = "Hello, how are you?"
response = basic_litellm(content)
print(response)

### Langfuse integration
Once you are using litellm, all you have to do is set the Langfuse Env Vars to start logging

In [None]:
# Configure Langfuse
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-bfbf4ed1-e5d4-4f3f-8c78-6e83c493bbc3"
os.environ["LANGFUSE_PUBLIC_KEY"]= "pk-lf-837d374f-a1ce-45dd-87d8-a165e8f08c5e"
os.environ["LANGFUSE_HOST"] = "http://langfuse-server:3000"

# Now if you run again you will see it logged if Langfuse server is running
response = basic_litellm(content)

In [None]:
# There are some langfuse wrapper classes that allow for basic integration just by changing
# the import to use the langfuse wrapper
from langfuse.openai import openai 

def story():
    return openai.chat.completions.create(
        model=gpt35model,
        max_tokens=100,
        messages=[
          {"role": "system", "content": "You are a great storyteller."},
          {"role": "user", "content": "Once upon a time in a galaxy far, far away..."}
        ],
    ).choices[0].message.content

print(story())

In [None]:
# Using the observe decorator means we can just use the completion API for all models
from langfuse.decorators import observe

@observe
def get_response():
    response = completion(
      model=model,
      messages=[{ "content": "Hello, how are you?","role": "user"}]
    )

    return response

print(get_response())

In [None]:
# A trace can contain all the steps leading up to the request from the LLM as well

@observe()
def run_larger_trace():
    @observe()
    def story():
        response = completion(
            model=model,
            max_tokens=50,
            messages=[
              {"role": "system", "content": "You are a great storyteller."},
              {"role": "user", "content": "Once upon a time in a galaxy far, far away..."}
            ],
            metadata = {"trace_id": langfuse_context.get_current_trace_id()}
        )
        return response.choices[0].message.content
     
    @observe()
    def main():
        return story()
     
    main()

#run_larger_trace()

In [None]:
# Metadata can be associated with each request
from langfuse.decorators import langfuse_context
import litellm

litellm.success_callback = ["langfuse"]
litellm.failure_callback = ["langfuse"]

@observe
def update_then_call():
    langfuse_context.update_current_trace(
            name="My Custom Name",
            session_id="99",
            user_id="3",
            tags=["story", "once upon a time"],
            public=True
        )

    run_larger_trace()

update_then_call()

In [None]:
from langfuse.decorators import observe
from langfuse.decorators import langfuse_context
from litellm import completion

@observe
def request(system: str, msg: str, trace_metadata: dict = None, model=gpt35model):

    if trace_metadata:
        langfuse_context.update_current_trace(**trace_metadata)
    
    response = completion(
            model=model,
            max_tokens=200,
            messages=[
              {"role": "system", "content": system},
              {"role": "user", "content": msg}
            ],
            metadata = {"trace_id": langfuse_context.get_current_trace_id()}
        )
    return response.choices[0].message.content

@observe
def request_allmsgs(msgs: list, trace_metadata: dict = None, model=gpt35model):

    if trace_metadata:
        langfuse_context.update_current_trace(**trace_metadata)
    
    response = completion(
            model=model,
            max_tokens=15,
            response_format={ "type": "json_object" },
            messages=msgs,
            metadata = {"trace_id": langfuse_context.get_current_trace_id()}
        )
    return response.choices[0].message.content


requests = []

In [None]:
classification_template_system = """You are a dynamic dispatcher that categorizes all requests into one of these categories:
["answer question", "write code", "write sql", "summarize", "extract"]
All responses should be in JSON format like this:
{"category": "answer question", "response": "The sky is blue because of the refraction of partiles in the atmosphere."}
"""

classification_template_user = "Categorize and respond to this request: {request_content}"


requests.append("Please tell me why the moon is full only every 28 days")
requests.append("Write a python function to calculate the lunar cycle")
requests.append("write a query to return all of the accounts with over $10k")
requests.append("Provide me with a synopsis of war and peace")
requests.append("who is being spoken to in these lyrics: Panda show\nTrees and balloons\nIce cream snow\nSee you in June")

trace_meta = {"tags":["tclass"], "public": True, "name": "categorize_test"}

# for r in requests[1:]:
#     system_msg = classification_template_system 
#     user_msg = classification_template_user.format(request_content=r)
#     response = request(system_msg, user_msg, trace_meta)
#     print(response)
    


In [None]:
from langfuse import Langfuse
import openai
 
# init
langfuse = Langfuse()

dataset = langfuse.get_dataset("CategorizeTest")
print(len(dataset.items))
for item in dataset.items:
    print(item)

In [None]:
system_msg12 = """You are a truck dispatch and management assistant helping with an application called TTY. The TTY application is used to manage 
a fleet of delivery vehicles. Each vehicle has a driver that is deliverying items to locations by a route. Locations are also called points.
All vehicles must be maintained with regular service or anytime time the vehicle has a problem.

Users will make requests and your job is to first correctly classify the request's mode, which is one of these values: help, unrelated, greeting, continuation
or function. 

The 3rd field of the JSON to complete is the "mode_reason", which contains the reason why you choose the mode.

Mode descriptions
unrelated: Questions that are unrelated to the business or the use of the application.
help: Questions that are related to the business or the application but do not have a function that can be used to fulfill the request.
greeting: Any standard type of greeting
continuation: Questions that are relate to the previous question
function: There is a specific function that can be used to fulfill this request.

The second classification occurs if the mode is 'function' and these are the possible function names:
[GetMaintenanceTimeFrame", "CreateNotification", "GetDrivenRoute", "GetDriverSafety", "GetMaintenance", 
"CreatePoint", "GetPointActivityTimeFrame", "CompareTruckActivity", "GetTruckActivity", "CreatePointWithAddress", "GetPointActivity"]

Function descriptions
GetMaintenance: Reports on potential required or recent maintence to a vehicle
GetMaintenanceTimeFrame: Generate a report about vehicle maintenence within some time frame

CreatePoint: Create a new location
CreatePointWithAddress: Create a new location when an address has been provided in the request

GetPointActivity: Get activity related to a specific point
GetPointActivityTimeFrame: Get activity related to a specific point over a specified time frame

CreateNotification: Notifications are generated for certain activities associated with a vehicle
GetDrivenRoute: Provides the route or location for a vehicle or driver 
GetDriverSafety: Reports on specific types of driving or actions by a driver that indicate the safety level of their driving
CompareTruckActivity: Report deetails about a specific truck or group of trucks


Examples
INPUT: {"role": "user", "content": "Who is the president of the USA?"}
OUTPUT: {"mode": "unrelated", "function_name": "", "mode_reason": "political offices are not realted to the trucking nad delivery business or application" }

INPUT: {"role": "user", "content": "What is the meaning of the yellow icon?"}
OUTPUT: {"mode": "help", "function_name": "", "mode_reason": "I assume they are referring to an icon in the application so need to llok in the help documents"}

INPUT: {"role": "user", "content": "Add a new geofence at <POINT_ADDRESS>"}
OUTPUT: {"mode": "function", "function_name": "CreatePointWithAddress", "mode_reason": "The CreatePointWithAddress function can be used to answer this question" }

INPUT: {"role": "user", "content": "should I send the Truck for maintenance"}
OUTPUT: {"mode": "function", "function_name": "GetMaintenance",  "mode_reason": "The GetMaintenance function can be used to know if maintenance is required"}

Only respond in valid JSON format with the field class in this format: {"mode": "function", "function_name": "GetMaintenance"}
"""

system_msg = """You are a truck dispatch and management assistant helping with an application called TTY. The TTY application is used to manage 
a fleet of delivery vehicles. Each vehicle has a driver that is deliverying items to locations by a route. Locations are also called points.
All vehicles must be maintained with regular service or anytime time the vehicle has a problem.
Users will make requests and your job is to correctly classify the request into one of these categories: ["Z-NotFleet", "GetMaintenanceTimeFrame", "CreateNotification", "GetDrivenRoute", "GetDriverSafety", "GetMaintenance", 
"CreatePoint", "GetPointActivityTimeFrame", "CompareTruckActivity", "Continuation", "GetTruckActivity", "CreatePointWithAddress",
"Greeting", "GetPointActivity", "Help"]

Here is a brief description of each category and how it is used. Note that the terms point and location mean the same thing.

GetMaintenance: Reports on potential required or recent maintence to a vehicle
GetMaintenanceTimeFrame: Generate a report about vehicle maintenence within some time frame

CreatePoint: Create a new location
CreatePointWithAddress: Create a new location when an address has been provided in the request

GetPointActivity: Get activity related to a specific point
GetPointActivityTimeFrame: Get activity related to a specific point over a specified time frame

CreateNotification: Notifications are generated for certain activities associated with a vehicle
GetDrivenRoute: Provides the route or location for a vehicle or driver 
GetDriverSafety: Reports on specific types of driving or actions by a driver that indicate the safety level of their driving
CompareTruckActivity: Report deetails about a specific truck or group of trucks

Z-NotFleet: Not related to any of the business functions of TTY
Greeting: Not related to specific TTY actions, just a conversational greeting
Help: Request that is related TTY business or the TTY application but where the answer would be found in help manuals and not by some specific action category
Continuation: The request is another question about the previous request


Examples
INPUT: {"role": "user", "content": "Add a new geofence at <POINT_ADDRESS>"}
OUTPUT: {"class": "CreatePointWithAddress"}


INPUT: {"role": "user", "content": "should I send the Truck for maintenance"}
OUTPUT: {"class": "GetMaintenance"}

Only respond in valid JSON format with the field class in this format: {"class": "GetPointActivityTimeFrame"}
"""

system2_msg = """You are a truck dispatch and management assistant helping with an application called TTY. The TTY application is used to manage 
a fleet of delivery vehicles. Each vehicle has a driver that is deliverying items to locations by a route. Locations are also called points.
All vehicles must be maintained with regular service or anytime time the vehicle has a problem.
Users will make requests and your job is to correctly classify the request into one of these categories: ["Z-NotFleet", "GetMaintenanceTimeFrame", "CreateNotification", "GetDrivenRoute", "GetDriverSafety", "GetMaintenance", 
"CreatePoint", "GetPointActivityTimeFrame", "CompareTruckActivity", "Continuation", "GetTruckActivity", "CreatePointWithAddress",
"Greeting", "GetPointActivity", "Help"]

Examples - Format Input: {'class': 'value'}
Add <EMPLOYEE_FIRST_NAME> - New Lenox at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
I want to add a location called <POINT_NAME> at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
Add a new geofence at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
when do the brake pads on <TRUCK_NAME> need replacement: {'class': 'GetMaintenance'}
New <POINT_CLASS_NAME> at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
Create a new point: {'class': 'CreatePoint'}
should I send the Truck for maintenance: {'class': 'GetMaintenance'}
is <TRUCK_NAME> due for an oil change?: {'class': 'GetMaintenance'}
how many more miles before <EMPLOYEE_FIRST_NAME> Truck needs maintenance: {'class': 'GetMaintenance'}
I need to know information about two vehicles...does any part need replacement in the next month on <TRUCK_NAME> or <TRUCK_NAME>: {'class': 'GetMaintenanceTimeFrame'}
How long did <EMPLOYEE_FIRST_NAME> stay at Khulna Office?: {'class': 'GetPointActivity'}
What time did <EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> get to The Liner Specialists?: {'class': 'GetPointActivity'}
Which driver goes to <POINT_NAME> the most?: {'class': 'GetPointActivity'}
Did our <TRUCK_CLASS_NAME> stay at <POINT_NAME><POINT_CLASS_NAME><POINT_NAME><POINT_NAME> for more than an hour?: {'class': 'GetPointActivity'}
I'm trying to figure out who are all the drivers are that make trips to <POINT_NAME>: {'class': 'GetPointActivity'}
Have our deliveries to <POINT_NAME> been ontime for the last few weeks: {'class': 'GetPointActivityTimeFrame'}
What are all the stops <EMPLOYEE_LAST_NAME> made yesterday and was he on time based on his schedule?: {'class': 'GetPointActivityTimeFrame'}
Did <EMPLOYEE_LAST_NAME> speed when he was in the <EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME><POINT_NAME> lot at <POINT_NAME> last week?: {'class': 'GetPointActivityTimeFrame'}
What's our on-time rate for <POINT_NAME> for February?: {'class': 'GetPointActivityTimeFrame'}
What's been happening at <POINT_NAME> for the last 10 days?: {'class': 'GetPointActivityTimeFrame'}
i want to add a new geofenced location to my map: {'class': 'CreatePoint'}
add a location to my list of deliveries: {'class': 'CreatePoint'}
Add point for the loading docks at Peppercorn Restaurant: {'class': 'CreatePoint'}
need to add a new point for my driver: {'class': 'CreatePoint'}
create a new location for my fleet: {'class': 'CreatePoint'}
help me create a new <POINT_CLASS_NAME> for <POINT_NAME>: {'class': 'CreatePoint'}
we have a new delivery location and I need to add it to the map the address is <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
find <POINT_NAME> and add it to the map: {'class': 'CreatePoint'}
add two points to the map one at <POINT_ADDRESS> and one at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
add two new Pickup locations to the map one at <POINT_NAME> and one at <POINT_NAME>: {'class': 'CreatePoint'}
Tell me when a vehicle is in stand still or idling: {'class': 'CreateNotification'}
give me the location for <POINT_NAME> and add it to the map: {'class': 'CreatePoint'}
find <POINT_ADDRESS> and add it: {'class': 'CreatePoint'}
when does <TRUCK_NAME> need maintenance: {'class': 'GetMaintenance'}
Explain the different icon colors: {'class': 'Help'}
add an icon to the map at the corner of western and w 22nd street: {'class': 'CreatePointWithAddress'}
add new location at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
new delivery <POINT_CLASS_NAME> for 5629 main street: {'class': 'CreatePointWithAddress'}
need to add a new <TRUCK_CLASS_NAME> location STATE ST &: {'class': 'CreatePointWithAddress'}
driver needs to map location at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
Make new stop location for <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
Make a polygon around the building at <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
New geofence for <POINT_ADDRESS> with 10 meter radius: {'class': 'CreatePointWithAddress'}
add 3 points on map at <POINT_ADDRESS> <POINT_ADDRESS> <POINT_ADDRESS>: {'class': 'CreatePointWithAddress'}
where do our drivers usually park at <POINT_NAME>: {'class': 'GetPointActivity'}
When did <EMPLOYEE_FIRST_NAME> <TRUCK_STATUS_NAME> at <POINT_NAME> and when did he <TRUCK_STATUS_NAME>: {'class': 'GetPointActivity'}
what time did Client - 3886 leave <POINT_NAME> and get to <POINT_NAME>: {'class': 'GetPointActivity'}
Did <TRUCK_NAME> stay within the geofence at <POINT_NAME> to make the delivery: {'class': 'GetPointActivity'}
when is the last time we were more than 30 minutes late for <POINT_NAME>: {'class': 'GetPointActivity'}
which location sees the most activity: {'class': 'GetPointActivity'}
Did <EMPLOYEE_FIRST_NAME> show up at <POINT_NAME> yesterday: {'class': 'GetPointActivityTimeFrame'}
where did <EMPLOYEE_FIRST_NAME> park when she went to <POINT_NAME> last week: {'class': 'GetPointActivityTimeFrame'}
did <EMPLOYEE_LAST_NAME> go to <POINT_NAME> twice on October 12?: {'class': 'GetPointActivityTimeFrame'}
did we skip a delivery to <POINT_NAME> two weeks ago: {'class': 'GetPointActivityTimeFrame'}
how many times does fleet go to <POINT_NAME> in a typical month: {'class': 'GetPointActivityTimeFrame'}
in the last six weeks how many times has a driver been late for <POINT_NAME>: {'class': 'GetPointActivityTimeFrame'}
show me our on time rates for <POINT_NAME> for the past month: {'class': 'GetPointActivityTimeFrame'}
How can I get different alerts?: {'class': 'Help'}
Can I get an alert if a driver is tailgating?: {'class': 'Help'}
How do I know if a driver isn't wearing his seatbelt?: {'class': 'Help'}
Can I get notified if a driver is regularly speeding?: {'class': 'Help'}
I think one of the drivers is on the phone all the time can I get a notification about this: {'class': 'Help'}
tell me what the different icons on the map mean: {'class': 'Help'}
Will I be notified for distracted driving: {'class': 'Help'}
How does the system know if someone is falling asleep or on the phone: {'class': 'Help'}
What does a red truck icon mean?: {'class': 'Help'}
Is a rolling stop considered a violation or will I get an alert on it?: {'class': 'Help'}
What are some driver actions that send alerts?: {'class': 'Help'}
need help understanding yelow icon color: {'class': 'Help'}
What does a driver have to do that makes the system send an alert?: {'class': 'Help'}
Are notifications for distracted driving sent to all managers?: {'class': 'Help'}
Add a new map <POINT_CLASS_NAME> for the closest mcdonalds to the OFFICE: {'class': 'CreatePoint'}
Put our new Home base on the map: {'class': 'CreatePoint'}
Add an icon to the map next to the one I just added: {'class': 'CreatePoint'}
Have we been more than 30 minutes late for Mitali Housing in the last six months?: {'class': 'GetPointActivityTimeFrame'}
which driver was speeding near <POINT_NAME> last week: {'class': 'GetPointActivityTimeFrame'}
two days ago the client says a driver was late for a pickup at <POINT_NAME> who was the driver: {'class': 'GetPointActivityTimeFrame'}
how many trucks need maintenance: {'class': 'GetMaintenance'}
is Steve's <TRUCK_CLASS_NAME> due for any maintenance?: {'class': 'GetMaintenance'}
When does <TRUCK_NAME> need to go in for air filter change oil change etc: {'class': 'GetMaintenance'}
Does Alauddin's <TRUCK_CLASS_NAME> have any issues it needs to take care of for maintenance?: {'class': 'GetMaintenance'}
Are any trucks overdue for maintenance?: {'class': 'GetMaintenance'}
how often does the fleet schedule oil changes: {'class': 'GetMaintenance'}
how many more miles can <EMPLOYEE_FIRST_NAME> go before <TRUCK_CLASS_NAME> neeeds maintenance?: {'class': 'GetMaintenance'}
Do we need to replace brake pads on any of the trucks?: {'class': 'GetMaintenance'}
Which trucks windshield wipers need replacing?: {'class': 'GetMaintenance'}
When did we take the blue <TRUCK_CLASS_NAME> in for maintenance?: {'class': 'GetMaintenance'}
summarize activity of my truck: {'class': 'GetTruckActivity'}
how was my fleet idling  or stop time: {'class': 'GetTruckActivity'}
want to look up vehicle activity like hours driven, total miles and operating time: {'class': 'GetTruckActivity'}
I want to find truck arrival and departure times at my factory: {'class': 'GetPointActivity'}
how many trucks need an oil change in the next two weeks: {'class': 'GetMaintenanceTimeFrame'}
show me the maintenance for all the trucks for the last two months: {'class': 'GetMaintenanceTimeFrame'}
show me the maintenance for all the trucks for the last two months: {'class': 'GetMaintenanceTimeFrame'}
which trucks had oil changes last month?: {'class': 'GetMaintenanceTimeFrame'}
Have any trucks had their windshield wipers replaced in the last 12 months?: {'class': 'GetMaintenanceTimeFrame'}
Did <EMPLOYEE_FIRST_NAME> take their <TRUCK_CLASS_NAME> in for maintenance in the last 10 days?: {'class': 'GetMaintenanceTimeFrame'}
<EMPLOYEE_FIRST_NAME> said they took their <TRUCK_CLASS_NAME> in for an oil change last week Is this true?: {'class': 'GetMaintenanceTimeFrame'}
Confirm that <EMPLOYEE_FIRST_NAME> got their oil changed and new brake pads two weeks ago: {'class': 'GetMaintenanceTimeFrame'}
Is <TRUCK_NAME> scheduled for any maintenance before August 7?: {'class': 'GetMaintenanceTimeFrame'}
What's the status of the brake pads for <TRUCK_NAME> and <TRUCK_NAME>: {'class': 'GetMaintenanceTimeFrame'}
What are all the maintenance issues we need to adddress in the next 5 weeks: {'class': 'GetMaintenanceTimeFrame'}
Has <TRUCK_NAME> been scheduled for maintenance for next week?: {'class': 'GetMaintenanceTimeFrame'}
Has <TRUCK_NAME> skipped any maintenance appointments?: {'class': 'GetMaintenanceTimeFrame'}
Which trucks in the fleet need maintenance urgently like within 1 or 2 days?: {'class': 'GetMaintenanceTimeFrame'}
Any immediate maintenance issues in the next week?: {'class': 'GetMaintenanceTimeFrame'}
i need to look up point activity: {'class': 'GetPointActivity'}
tell me about my trucks arrivals at <POINT_NAME>: {'class': 'GetPointActivity'}
Look up truck arrival times.: {'class': 'GetPointActivity'}
help: {'class': 'Help'}
Where is <TRUCK_NAME>24/7-PinPoint1x-Lynchburg-KORE-CDMA--129DELPHI</TRUCK NAME>?: {'class': 'GetDrivenRoute'}
Where is <TRUCK_NAME> now: {'class': 'GetDrivenRoute'}
Where did <TRUCK_NAME> go today?: {'class': 'GetDrivenRoute'}
Show <TRUCK_NAME> route for the last two days: {'class': 'GetDrivenRoute'}
Map <TRUCK_NAME> for yesterday: {'class': 'GetDrivenRoute'}
Location of <TRUCK_NAME>: {'class': 'GetDrivenRoute'}
where has <TRUCK_NAME> been for the last 3 hours: {'class': 'GetDrivenRoute'}
Trying to figure out if the right routes are being  driven specifically did <TRUCK_NAME> do the right route two days ago?: {'class': 'GetDrivenRoute'}
What route did <TRUCK_NAME> drive on 7/14?: {'class': 'GetDrivenRoute'}
Show <TRUCK_NAME> routes for the past week: {'class': 'GetDrivenRoute'}
Map <TRUCK_NAME> last wednesday: {'class': 'GetDrivenRoute'}
Find <TRUCK_NAME>: {'class': 'GetDrivenRoute'}
Show <TRUCK_NAME> map last 2 weeks: {'class': 'GetDrivenRoute'}
See map of <TRUCK_NAME> from 3/12 - 3/15: {'class': 'GetDrivenRoute'}
Route <TRUCK_NAME> 12:00 - 4:00 on 4/11/24: {'class': 'GetDrivenRoute'}
Compare the driving patterns of <TRUCK_NAME> and <TRUCK_NAME> yesterday: {'class': 'CompareTruckActivity'}
How does <TRUCK_NAME> compare to <TRUCK_NAME>: {'class': 'CompareTruckActivity'}
Tell me there is any difference in how <TRUCK_NAME> drives vs <TRUCK_NAME>: {'class': 'CompareTruckActivity'}
Is there any difference <TRUCK_NAME> <TRUCK_NAME> mileage and times in the last two days: {'class': 'CompareTruckActivity'}
Difference mileage between <TRUCK_NAME> and <TRUCK_NAME> on 12/13: {'class': 'CompareTruckActivity'}
How does <TRUCK_NAME> driving compare to <TRUCK_NAME> today: {'class': 'CompareTruckActivity'}
I want to compare how long it tooks two trucks to complete the same route so show me how long it took <TRUCK_NAME> to complete the route vs <TRUCK_NAME> in the past month: {'class': 'CompareTruckActivity'}
<TRUCK_NAME> versus <TRUCK_NAME> milage for the last four hours Thursday: {'class': 'CompareTruckActivity'}
How long did <TRUCK_NAME> idle vs <TRUCK_NAME> this month so far: {'class': 'CompareTruckActivity'}
Compare idle times for <TRUCK_NAME> and <TRUCK_NAME> past week: {'class': 'CompareTruckActivity'}
When did <TRUCK_NAME> and <TRUCK_NAME> <TRUCK_STATUS_NAME> this morning: {'class': 'CompareTruckActivity'}
what times do trucks <TRUCK_NAME> <TRUCK_NAME> and <TRUCK_NAME> normally disconnect: {'class': 'CompareTruckActivity'}
where do <TRUCK_NAME> and <TRUCK_NAME> idle the longest?: {'class': 'CompareTruckActivity'}
when did <TRUCK_NAME> and <TRUCK_NAME> disconnect on 5/24/24: {'class': 'CompareTruckActivity'}
which truck had lower milage yesterday <TRUCK_NAME> or <TRUCK_NAME>: {'class': 'CompareTruckActivity'}
show safety alerts for <EMPLOYEE_FIRST_NAME> for the past week: {'class': 'GetDriverSafety'}
how many safety violations for <EMPLOYEE_FIRST_NAME> since January 1: {'class': 'GetDriverSafety'}
video of <EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> tailgating yesterday: {'class': 'GetDriverSafety'}
Are any of the drivers talking on the phone and can you tell me which ones were on the phone last monday through today: {'class': 'GetDriverSafety'}
Any drivers distracted this week: {'class': 'GetDriverSafety'}
Speeding drivers for the last 2 weeks: {'class': 'GetDriverSafety'}
Show me any critical or major incidents ths month: {'class': 'GetDriverSafety'}
All driver violations 3/12 - 4/12: {'class': 'GetDriverSafety'}
<EMPLOYEE_LAST_NAME><EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> violation on Jun 7: {'class': 'GetDriverSafety'}
<EMPLOYEE_LAST_NAME><EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> safety july 25: {'class': 'GetDriverSafety'}
did <EMPLOYEE_FIRST_NAME> almost hit someone yesterday: {'class': 'GetDriverSafety'}
<EMPLOYEE_LAST_NAME><EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> traffic violations 4-4: {'class': 'GetDriverSafety'}
Who was driving distracted on 11/4/24: {'class': 'GetDriverSafety'}
When did <EMPLOYEE_FIRST_NAME> speed over the past month: {'class': 'GetDriverSafety'}
Show <EMPLOYEE_FIRST_NAME><EMPLOYEE_LAST_NAME> safety stats for May: {'class': 'GetDriverSafety'}
Add a new point: {'class': 'CreatePoint'}
add a new geofence location: {'class': 'CreatePoint'}
tell me about miles from yesterday. And may be about hours of operation: {'class': 'GetTruckActivity'}
vehicle or truck activites for fleet: {'class': 'GetTruckActivity'}
looking for truck activity: {'class': 'GetTruckActivity'}
what were the activities for my fleet : {'class': 'GetTruckActivity'}
what was the hours of operation and miles driven: {'class': 'GetTruckActivity'}
looking or tell me miles summary of the fleet: {'class': 'GetTruckActivity'}
<TRUCK_NAME> at <POINT_NAME> over weekend: {'class': 'GetTruckActivity'}
Arrivals to <POINT_NAME> last week: {'class': 'GetPointActivityTimeFrame'}
<POINT_NAME> activity last 7 days: {'class': 'GetPointActivity'}
What trucks were at <POINT_NAME> last month: {'class': 'GetMaintenanceTimeFrame'}
Deliveries to <POINT_NAME> in last 2 days: {'class': 'GetPointActivity'}
All trucks <POINT_NAME> previous week: {'class': 'GetPointActivity'}
I want to know arrivals and departures of my fleet: {'class': 'GetPointActivity'}
create a new location or point: {'class': 'CreatePoint'}
Report <POINT_NAME> arrivals 10/2 to 10/13: {'class': 'GetPointActivity'}
Times for <POINT_NAME> arrivals departures 12/1/24: {'class': 'GetPointActivity'}
Where is <TRUCK_NAME> now: {'class': 'GetDrivenRoute'}
<TRUCK_NAME> location now: {'class': 'GetDrivenRoute'}
Is <TRUCK_NAME> going to <POINT_NAME> now: {'class': 'GetDrivenRoute'}
Where did <EMPLOYEE_FIRST_NAME> go yesterday: {'class': 'GetDrivenRoute'}
Where is <TRUCK_NAME> now: {'class': 'GetDrivenRoute'}
What point did <EMPLOYEE_FIRST_NAME> go to on Thursday: {'class': 'GetDrivenRoute'}
Show <EMPLOYEE_FIRST_NAME> route 2/25: {'class': 'GetDrivenRoute'}
<EMPLOYEE_FIRST_NAME> yesterday from 3 - 5 pm: {'class': 'GetDrivenRoute'}
<EMPLOYEE_FIRST_NAME> Roy map 2 days ago during shift: {'class': 'GetDrivenRoute'}
<EMPLOYEE_FIRST_NAME> after <POINT_NAME> on 6/12: {'class': 'GetDrivenRoute'}
<EMPLOYEE_FIRST_NAME> after 10:00 on 7/16: {'class': 'GetDrivenRoute'}
summarize activities at <POINT_NAME>: {'class': 'GetPointActivity'}
tell me anything i need to know with <POINT_NAME>: {'class': 'GetPointActivity'}
How are you today: {'class': 'Greeting'}
What's the weather today: {'class': 'Z-NotFleet'}
Weather in NYC today: {'class': 'Z-NotFleet'}
How was your weekend: {'class': 'Z-NotFleet'}
Where are my car keys: {'class': 'Z-NotFleet'}
What time did I leave yesterday: {'class': 'Z-NotFleet'}
I need to go to the doctor: {'class': 'Z-NotFleet'}
Covid symptoms: {'class': 'Z-NotFleet'}
Notify me when vehicle has no messages for 2 days: {'class': 'CreateNotification'}
When a vehicle is gone for lunch greater than 2 hours, send me a message: {'class': 'CreateNotification'}
How many vacation days do I get: {'class': 'Z-NotFleet'}
How do I know if I need to go to the doctor: {'class': 'Z-NotFleet'}
Is it going to rain today?: {'class': 'Z-NotFleet'}
Best cake store near me: {'class': 'Z-NotFleet'}
what is AI?: {'class': 'Z-NotFleet'}
Can you help me with homework: {'class': 'Z-NotFleet'}
tell me a joke: {'class': 'Z-NotFleet'}
What's the latest news: {'class': 'Z-NotFleet'}
Can you recommend a good book?: {'class': 'Z-NotFleet'}
What should I do this weekend?: {'class': 'Z-NotFleet'}
What should I make for dinner?: {'class': 'Z-NotFleet'}
Who do I talk to in HR?: {'class': 'Z-NotFleet'}
Hobbies to try: {'class': 'Z-NotFleet'}
Most recent person hired here: {'class': 'Z-NotFleet'}
Problem with my paycheck: {'class': 'Z-NotFleet'}
Are aliens real?: {'class': 'Z-NotFleet'}
Who's going to win the election?: {'class': 'Z-NotFleet'}
What do I need to do today?: {'class': 'Z-NotFleet'}
I need to change my banking info: {'class': 'Z-NotFleet'}
Do sick days roll over to the next year?: {'class': 'Z-NotFleet'}
Restaurant recommendations for a steak dinner: {'class': 'Z-NotFleet'}
Play some music: {'class': 'Z-NotFleet'}
Hello: {'class': 'Greeting'}
Hi: {'class': 'Greeting'}
Hey: {'class': 'Greeting'}
Greetings: {'class': 'Greeting'}
What's up: {'class': 'Greeting'}
Whassup: {'class': 'Greeting'}
Yo: {'class': 'Greeting'}
Good morning: {'class': 'Greeting'}
Good morning: {'class': 'Greeting'}
Morning: {'class': 'Greeting'}
Good afternoon: {'class': 'Greeting'}
Good evening: {'class': 'Greeting'}
Namaste: {'class': 'Greeting'}
Hola: {'class': 'Greeting'}
How are you: {'class': 'Greeting'}
How are you today: {'class': 'Greeting'}
How ya doing: {'class': 'Greeting'}
Howdy: {'class': 'Greeting'}
Good day: {'class': 'Greeting'}
Hiya: {'class': 'Greeting'}
How's it going: {'class': 'Greeting'}
What's new: {'class': 'Greeting'}
Let's get started: {'class': 'Greeting'}
Tell me more: {'class': 'Continuation'}
More please: {'class': 'Continuation'}
What else can you show: {'class': 'Continuation'}
What other options do I have: {'class': 'Continuation'}
Can you give me more details?: {'class': 'Continuation'}
Summarize that answer: {'class': 'Continuation'}
Highlight parts of that answer: {'class': 'Continuation'}
Show me something else: {'class': 'Continuation'}
Tell me more about that: {'class': 'Continuation'}
I want to see something similar but with different details: {'class': 'Continuation'}
Filter that answer: {'class': 'Continuation'}
Show me just the important parts: {'class': 'Continuation'}
Besides that I also want to see other information: {'class': 'Continuation'}
I only want to see certain parts: {'class': 'Continuation'}
Show something different: {'class': 'Continuation'}
What browsers can I use?: {'class': 'Help'}
Can I use safari?: {'class': 'Help'}
Why is my truck icon yellow?: {'class': 'Help'}
Where do I log in?: {'class': 'Help'}
How do I change my login and password?: {'class': 'Help'}
How do I track a vehicle?: {'class': 'Help'}
How do I track a vehicle?: {'class': 'Help'}
How do I find a vehicle?: {'class': 'Help'}
What does it mean if a vehicle is green?: {'class': 'Help'}
What if a vehicle is yellow?: {'class': 'Help'}
What if a truck is red?: {'class': 'Help'}
How to locate vehicle on map: {'class': 'Help'}
How to center map: {'class': 'Help'}
How do I refresh the map: {'class': 'Help'}
How do I track a vehicle on the map?: {'class': 'Help'}
How do I change the icons I see on the map?: {'class': 'Help'}
How do I rename a vehicle?: {'class': 'Help'}
Can I customize the map?: {'class': 'Help'}
What is a geofence?: {'class': 'Help'}
What can I do with a geofence?: {'class': 'Help'}
How do I start making a geofence?: {'class': 'Help'}
How do I change a point's location on the map?: {'class': 'Help'}
How do I create an alert for a point?: {'class': 'Help'}
How do I create an alert for a geofence?: {'class': 'Help'}
What do I click on to change the vehicle name?: {'class': 'Help'}
How do I save a new vehicle name?: {'class': 'Help'}
How to create a maintenance schedule?: {'class': 'Help'}
What do I click on to see the maintenance schedules?: {'class': 'Help'}
Can you do that for me?: {'class': 'Continuation'}
How can I see all the vehicles that require maintenance?: {'class': 'Help'}
How can i see a service history for a specific vehicle?: {'class': 'Help'}
Can I change the service history for a vehicle?: {'class': 'Help'}
How do I mark a service as completed?: {'class': 'Help'}
Scheduled maintenance was done so how do I record this?: {'class': 'Help'}
How do I save information about servies completed?: {'class': 'Help'}
How do I create a new maintenance schedule?: {'class': 'Help'}
How do I create a new maintenance template?: {'class': 'Help'}
How do I add multiple vehicles to a maintenance template?: {'class': 'Help'}
What is a maintenance template?: {'class': 'Help'}
What does a maintenance template do?: {'class': 'Help'}
I can't find how to start a new maintenance template: {'class': 'Help'}
Let me know when truck stays at <POINT_NAME> for mlore than 2 hours: {'class': 'CreateNotification'}
How do I copy a template to create a new one?: {'class': 'Help'}
How can I see which vehicles are on the maintenance template?: {'class': 'Help'}
Which vehicle has the most miles?: {'class': 'GetTruckActivity'}
Help: {'class': 'Help'}
Looking for help: {'class': 'Help'}
Show me how to look up help: {'class': 'Help'}

Every response must be in JSON format
"""
  

In [None]:
import csv
import json
from langfuse import Langfuse
import openai
from collections import defaultdict
import traceback

# init
langfuse = Langfuse()

path = "./fleet-output.csv"
valid_classes = ["Z-NotFleet", "GetMaintenanceTimeFrame", "CreateNotification", "GetDrivenRoute", "GetDriverSafety", "GetMaintenance", 
"CreatePoint", "GetPointActivityTimeFrame", "CompareTruckActivity", "Continuation", "GetTruckActivity", "CreatePointWithAddress",
"Greeting", "GetPointActivity", "Help"]

valid_classes_with_modes = ["GetMaintenanceTimeFrame", "CreateNotification", "GetDrivenRoute", "GetDriverSafety", "GetMaintenance", 
"CreatePoint", "GetPointActivityTimeFrame", "CompareTruckActivity", "GetTruckActivity", "CreatePointWithAddress", "GetPointActivity"]

valid_classes_with_modes = [x.lower() for x in valid_classes_with_modes]
valid_modes = ['function', 'help', 'continuation', 'not applicable']
num_classes = len(valid_classes)
print(f"{len(valid_classes)} valid classes which means there are {num_classes*(num_classes-1)} potential mismatches. Creating mismatch tracker")

def class_key(expected, guess):
    return f"{expected}-{guess}"
    
mismatched_classes = defaultdict(int)
for i, c1 in enumerate(valid_classes):
    for j, c2 in enumerate(valid_classes):
        if i != j:
            mismatched_classes[class_key(i,j)] = 0

@observe
def test_examples(max_tests: int = 2):

    total = 0
    total_correct = 0
    total_json_fails = 0
    with open(path) as fi:
        reader = csv.reader(fi)
        headers = next(reader)
        counter = 0
        for l in reader:
            counter +=1
            user_msg = l[0]
            correct_class = l[1]
            msgs = [{"role": "user", "content": user_msg},
                    {"role": "system", "content": system_msg}]
            
            response = request_allmsgs(msgs)
            print(f"response: {response}")
            correct = 0
            try:
                # this handles cases where single wuoted json is returned
                response = json.loads(response.replace("'", '"'))
                guessed = response.get("class") 
                if guessed == correct_class:
                    correct = 1
                else:
                    mismatched_classes[class_key(correct_class,guessed)] += 1 
                
                print(correct, user_msg, response, correct_class)
            except Exception as ex:
                print ("failed to load into JSON", ex)
                print(correct, user_msg, resp, correct_class)
                total_json_fails += 1
                ##traceback.print_exc()
               
            total += 1
            total_correct += correct
            if counter >= max_tests: break

    print(f"total requests {total}, # correct {total_correct}, % correct {total_correct/total} with {total_json_fails} failed to return JSON")
    for k, v in mismatched_classes.items():
        if v > 0:
            print(f"{k}: {v}")

def load_examples(path):
     with open(path) as fi:
        reader = csv.reader(fi)
        headers = next(reader)
        headers.append('mode')
        rows = [headers]
        for l in reader:
            user_msg = l[0]
            correct_class = l[1]
            mode = ''
            if correct_class.lower() in valid_classes_with_modes:
                mode = 'function'
            elif correct_class.lower() == 'help':
                mode = 'help'
            elif correct_class.lower() == 'continuation':
                mode = 'continuation'
            elif correct_class.lower() == 'z-notfleet':
                mode = 'z-notfleet'
            elif correct_class.lower() == 'greeting':
                mode = 'z-notfleet'
            row = [user_msg, correct_class, mode]
            rows.append(row)
        return rows

def create_dataset(path):
    dataset_name = "CategorizeTTY"
    dataset = langfuse.create_dataset(dataset_name)
    classname = set()
    with open(path) as fi:
        reader = csv.reader(fi)
        headers = next(reader)
        for l in reader:
            user_msg = l[0]
            correct_class = l[1]
            msgs = [{"role": "user", "content": user_msg},
                    {"role": "system", "content": system_msg}]
    
            output = {"class": correct_class}
            ds_item = langfuse.create_dataset_item(dataset_name=dataset_name, input=msgs, expected_output=output)
        

In [None]:
import pandas as pd
path = "./fleet-output.csv"
data = load_examples(path)
df = pd.DataFrame(columns=data[0], data=data[1:])

In [None]:
print(df.groupby('mode').count())
print(df.groupby('label').count())

In [None]:
test_examples(400)

Object `test_examples` not found.


In [None]:
dict(sorted(mismatched_classes.items(), key=lambda item: item[1], reverse=True))

In [None]:
# Load the dataset and test responses
# Create a run
# 

"""
from datetime import datetime
 
def run_my_custom_llm_app(input, system_prompt):
  messages = [
      {"role":"system", "content": system_prompt},
      {"role":"user", "content": input["country"]}
  ]
 
  generationStartTime = datetime.now()
 
  openai_completion = openai.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=messages
  ).choices[0].message.content
 
  langfuse_generation = langfuse.generation(
    name="guess-countries",
    input=messages,
    output=openai_completion,
    model="gpt-3.5-turbo",
    start_time=generationStartTime,
    end_time=datetime.now()
  )
 
  return openai_completion, langfuse_generation
  
def run_experiment(experiment_name, system_prompt):
  dataset = langfuse.get_dataset("capital_cities")
 
  for item in dataset.items:
    completion, langfuse_generation = run_my_custom_llm_app(item.input, system_prompt)
 
    item.link(langfuse_generation, experiment_name) # pass the observation/generation object or the id
 
    langfuse_generation.score(
      name="exact_match",
      value=simple_evaluation(completion, item.expected_output)
    )
"""

from litellm import completion
from datetime import datetime

dataset_name = "CategorizeTTY"

def run_llm_request(messages: list):
 
    generationStartTime = datetime.now()
    
    response = completion(
      model=model,
      messages=messages
    ).choices[0].message.content
    
    langfuse_generation = langfuse.generation(name="classify_tty",
                                                input=messages,
                                                output=response,
                                                model=model,
                                                start_time=generationStartTime,
                                                end_time=datetime.now()
                                                )
    
    return response, langfuse_generation

stop = 1
def run_experiment(experiment_name):
    dataset = langfuse.get_dataset(dataset_name)

    counter = 0
    for item in dataset.items:
        response, langfuse_generation = run_llm_request(item.input)
        
        item.link(langfuse_generation, experiment_name) # pass the observation/generation object or the id
        
        langfuse_generation.score(
              name="Categorize",
              value = 1 if response == item.expected_output else 0
        )
        counter += 1
        if counter > stop:
            break
      
experiment_name = "test1"
run_experiment(experiment_name)

In [None]:
js = "{'class': 'Help'}"
json.loads(js)