# Open Telemetry

Now that we're pushing our system into production, how can we ensure we're working with a well oiled system? 

# Logfire

In [3]:
import logfire

!logfire auth 

You are already logged in. [1m([0mYour credentials are stored in 
[1;35m/Users/ivanleo/.logfire/[0m[1;95mdefault.toml[0m[1m)[0m


In [6]:
import logfire
from datetime import date

logfire.configure()

In [8]:
logfire.info('Hello, {name}!', name='world')  

with logfire.span('Asking the user their {question}', question='age'):  
    user_input = input('How old are you [YYYY-mm-dd]? ')
    dob = date.fromisoformat(user_input)  
    logfire.debug('{dob=} {age=!r}', dob=dob, age=date.today() - dob)

08:27:41.402 Hello, world!
08:27:41.403 Asking the user their age


How old are you [YYYY-mm-dd]?  1997-06-04


## Exceptions

What happens when we see errors?

In [9]:

logfire.info('Hello, {name}!', name='world')  

with logfire.span('Asking the user their {question}', question='age'):  
    user_input = input('How old are you [YYYY-mm-dd]? ')
    dob = date.fromisoformat(user_input)  
    logfire.debug('{dob=} {age=!r}', dob=dob, age=date.today() - dob)

08:29:40.943 Hello, world!
08:29:40.946 Asking the user their age


How old are you [YYYY-mm-dd]?  22


ValueError: Invalid isoformat string: '22'

![CleanShot 2024-06-17 at 16.29.19.png](attachment:91710bee-e4f3-48a1-90e9-7af36c810735.png)

![CleanShot 2024-06-17 at 16.30.00.png](attachment:8af7f5a6-819b-4917-841c-8ca565cfca43.png)

## Pydantic Validation

Pydantic is useful for us in production to prevent untyped objects from flying around and wrecking havoc. 

In [10]:
from datetime import date
import logfire
from pydantic import BaseModel

logfire.configure()

class User(BaseModel):
    name: str
    country_code: str
    dob: date

user = User(name='Anne', country_code='USA', dob='2000-01-01')
logfire.info('user processed: {user!r}', user=user) 

08:31:19.052 user processed: User(name='Anne', country_code='USA', dob=datetime.date(2000, 1, 1))


![CleanShot 2024-06-17 at 16.39.23.png](attachment:33a6d066-ebe6-4494-9723-2fc54f5fb7ac.png)

But what happens when we have invalid validation?

In [11]:
class User(BaseModel):
    name: str
    country_code: str
    dob: date

user = User(name='Anne', country_code='USA', dob='12344')

ValidationError: 1 validation error for User
dob
  Datetimes provided to dates should have zero time - e.g. be exact dates [type=date_from_datetime_inexact, input_value='12344', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/date_from_datetime_inexact

Turns out in order to log these validations, we need to configure the integration with Pydantic

In [12]:
logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record='all'))  

class User(BaseModel):
    name: str
    country_code: str
    dob: date

user = User(name='Anne', country_code='USA', dob='12344')

08:42:31.401 Pydantic User validate_python


ValidationError: 1 validation error for User
dob
  Datetimes provided to dates should have zero time - e.g. be exact dates [type=date_from_datetime_inexact, input_value='12344', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/date_from_datetime_inexact

![CleanShot 2024-06-17 at 16.42.46.png](attachment:7267ad67-23af-4ab9-aeae-c2c7b28aefd1.png)

![CleanShot 2024-06-17 at 16.43.00.png](attachment:cb2bb7f6-5abf-44b2-80d4-fa0a5126b4fe.png)

## I Don't have an integration

Just use manual logging

In [13]:
import time

import logfire

logfire.configure()

with logfire.span('This is a span'):
    time.sleep(1)
    logfire.info('This is an info log')
    time.sleep(2)

08:52:46.824 This is a span
08:52:47.830   This is an info log


![CleanShot 2024-06-17 at 16.53.01.png](attachment:96e28926-82ba-4c9f-8e51-fd8f61d52249.png)

# Topic Modelling in Production

Why do we care about Open Telemetry

In [5]:
import random

def topic_model(query):
    return random.choice([1,2,3])

topic_model("This is a frog")

3

In [6]:
# Capability
from pydantic import BaseModel,Field
from typing import Literal

class Capability(BaseModel):
    capabilities: list[Literal['Web Search','Search Email','Retrieve Calendar']] = Field(...,description="This \
    is a list of capabilities that must be provided in order to execute/respond to the user's query")
    missing_capabilities: list[str] = Field(...,description="These are capabilities that are missing from the provided list \
    that you think should be provided")

In [7]:
import instructor
from openai import OpenAI

client = instructor.from_openai(OpenAI())

resp = client.chat.completions.create(
    model = "gpt-4o",
    response_model = Capability,
    messages = [
        {
            "role":"system",
            "content": "You are an advanced tagging system that excels at extracting what capabilites from the provided list \
            need to be used in order to answer a user's query."
        },
        {
            "role":"user",
            "content": "The query is 'Can you schedule a meeting with Daniel later this week?'"
        }
    ]
)
resp

Capability(capabilities=['Retrieve Calendar'], missing_capabilities=['Calendar Scheduling'])

In [18]:
class Capability(BaseModel):
    """
    This is a model representing an evaluation of the required capabilities to execute a task.

    A capability here is a brief 2-3 word phrase that describes a tool or function that is needed to fulfil a user request
    """
    capabilities: list[Literal['Web Search','Search Email','Retrieve Calendar']] = Field(...,description="This \
    is a list of capabilities that must be provided in order to execute/respond to the user's query")
    missing_capabilities: list[str] = Field(...,description="These are capabilities that are missing from the provided list \
    that you think should be provided")

class QueryTagger(BaseModel):
    capabilities: list[str]
    topic_model: int

def tag_query(query):
    resp = client.chat.completions.create(
    model = "gpt-3.5-turbo",
    response_model = Capability,
    messages = [
            {
                "role":"system",
                "content": "You are an advanced tagging system that excels at extracting what capabilites from the provided list \
                need to be used in order to answer a user's query."
            },
            {
                "role":"user",
                "content": f"The query is '{query}'"
            }
        ]
    )
    return QueryTagger(**{
        "capabilities": resp.capabilities + resp.missing_capabilities,
        "topic_model": topic_model(query)
    })

# tag_query("Can you schedule a chat with Daniel in the Tanjong Pagar room later in the week?")

What can we do with this? Well, it's extremely useful when we can segregate by topic and capabilities to see what to prioritise in our roadmap

In [22]:
from lib.data import get_labels

test_data = get_labels("../data/queries_single_label.jsonl")

In [23]:
len(test_data)

1066

In [9]:
tagged_queries = [
   tag_query(item['query']) for item in test_data
]
tagged_queries

[QueryTagger(capabilities=['Web Search'], topic_model=1),
 QueryTagger(capabilities=['Web Search'], topic_model=3),
 QueryTagger(capabilities=['Web Search'], topic_model=2),
 QueryTagger(capabilities=['Web Search'], topic_model=3),
 QueryTagger(capabilities=['Web Search'], topic_model=3),
 QueryTagger(capabilities=['Web Search'], topic_model=2),
 QueryTagger(capabilities=['Web Search'], topic_model=1),
 QueryTagger(capabilities=['Web Search'], topic_model=1),
 QueryTagger(capabilities=['Web Search'], topic_model=3),
 QueryTagger(capabilities=['Biology Knowledge'], topic_model=1)]

In [54]:
from openai import AsyncOpenAI
import instructor
from tqdm.asyncio import tqdm_asyncio as asyncio
from tenacity import retry, stop_after_attempt, wait_random_exponential
from asyncio import Semaphore

client = instructor.from_openai(AsyncOpenAI())
sem = Semaphore(20)

class Capability(BaseModel):
    """
    This is a model representing an evaluation of the required capabilities to execute a task.

    A capability here is a brief 2-3 word phrase that describes a tool or function that is needed to fulfil a user request
    """
    capabilities: list[Literal['Product Information Retrieval','Flight Information Retrieval','Restaurant Recomendations',\
    'Search Email','Retrieve Calendar','Latest News','Historical Price Information']] = Field(...,description="This \
    is a list of capabilities that must be provided in order to execute/respond to the user's query")

class QueryTagger(BaseModel):
    capabilities: list[str]
    topic_model: int
    query:str

@retry(
        wait=wait_random_exponential(multiplier=1, min=10, max=90),
        stop=stop_after_attempt(3),
    )
async def tag_query(query):
    async with sem:
        resp = await client.chat.completions.create(
        model = "gpt-3.5-turbo",
        response_model = Capability,
        messages = [
                {
                    "role":"system",
                    "content": "You are an advanced tagging system that excels at extracting what capabilites from the provided list \
                    need to be used in order to answer a user's query."
                },
                {
                    "role":"user",
                    "content": f"The query is '{query}'"
                }
            ],
            max_retries=3
        )
        return QueryTagger(**{
            "capabilities": resp.capabilities,
            "topic_model": topic_model(query),
            "query": query
        })

async def process_queries(data:list[str]):
    coros = [tag_query(item['query']) for item in data]
    res = await asyncio.gather(*coros)
    return res

processed = await process_queries(test_data[:100])

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.36it/s]


In [55]:
import pandas as pd
import json

df = pd.DataFrame([json.loads(item.model_dump_json()) for item in processed])
df['capabilities'] = df['capabilities'].astype(str)
df['capabilities'] = df['capabilities'].str.strip('[]').str.split(', ')
df = df.explode('capabilities')
df.groupby(["topic_model","capabilities"]).size()

topic_model  capabilities                   
1            'Flight Information Retrieval'      2
             'Historical Price Information'     15
             'Latest News'                       9
             'Product Information Retrieval'    29
             'Restaurant Recomendations'         2
             'Retrieve Calendar'                 6
             'Search Email'                     11
2            'Flight Information Retrieval'      1
             'Historical Price Information'     10
             'Latest News'                       5
             'Product Information Retrieval'    13
             'Restaurant Recomendations'         1
             'Retrieve Calendar'                 4
             'Search Email'                      6
3            'Flight Information Retrieval'      3
             'Historical Price Information'     19
             'Latest News'                       6
             'Product Information Retrieval'    26
             'Restaurant Recomendatio

## Average Cos Distance ( Track with Logfire perhaps )

In [14]:
from lib.data import get_labels
from lib.query import full_text_search
from lib.models import EmbeddedPassage
from lib.db import get_table
from lib.string_helpers import strip_punctuation
from lib.openai_helpers import generate_embeddings
from tqdm import tqdm
import lancedb

db = lancedb.connect("../lance")

candidates = {
    "Full Text Search": full_text_search,
}

test_data = get_labels("../data/queries_single_label.jsonl")[:2]
table = get_table(db, "ms_marco", EmbeddedPassage)


query_embeddings = generate_embeddings(test_data, 20)

100%|██████████| 1/1 [00:00<00:00, 7096.96it/s]


In [17]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_similarity(query_embeddings,query_embeddings)


[[-0.015156371518969536,
  0.0004377496661618352,
  -0.016145572066307068,
  0.014712936244904995,
  0.038567449897527695,
  0.051211025565862656,
  0.02396821416914463,
  0.06394556164741516,
  -0.04352482408285141,
  0.03174537792801857,
  0.009994336403906345,
  0.020386626943945885,
  0.011722594499588013,
  0.022672021761536598,
  0.018624257296323776,
  -0.03565669804811478,
  0.007163176313042641,
  -0.01920413412153721,
  -0.01973853074014187,
  0.028857367113232613,
  -0.04461635276675224,
  0.009965910576283932,
  0.005326901562511921,
  -0.012029588222503662,
  0.03949980065226555,
  -0.013917027972638607,
  0.0028652704786509275,
  -0.0036981317680329084,
  0.004772608168423176,
  -0.029812457039952278,
  0.013484963215887547,
  -0.03347363695502281,
  0.017657797783613205,
  -0.002978971693664789,
  0.037521399557590485,
  -0.02027292549610138,
  0.02476412244141102,
  -0.06885745376348495,
  -0.0009984386852011085,
  0.013860177248716354,
  -0.0017254158155992627,
  -0.03

In [16]:
from lancedb.rerankers import CohereReranker
from scipy.spatial.distance import cosine
import numpy as np
import pandas as pd

evaluation_k = 10


data = []
for query,query_embed in tqdm(zip(test_data,query_embeddings)):
    reranker = CohereReranker(model_name="rerank-multilingual-v3.0")
    items = table.search(strip_punctuation(query["query"]), query_type="fts").limit(50).to_list()
    reranked_items = table.search(strip_punctuation(query["query"]), query_type="fts").limit(50).rerank(reranker=reranker).to_list()

    item_vectors = [item['vector'] for item in items][:evaluation_k]
    reranked_item_vectors = [item['vector'] for item in reranked_items][:evaluation_k]

    item_chunk_ids = [item['chunk_id'] for item in items]
    reranked_item_chunk_ids = [item['chunk_id'] for item in reranked_items]

    # Now we calculate the average cosine distance between each list and the query
    item_cos_similarities = cosine_similarity(item_vectors, [query_embed]).flatten()
    reranked_item_cos_similarities = cosine_similarity(reranked_item_vectors, [query_embed]).flatten()

    # evaluation_metrics = score(item_chunk_ids, query["selected_chunk_id"])
    

    data.append({
        "reranked_cos_distance": reranked_item_cos_distance,
        "cos_distance": item_cos_distance
    })



df = pd.DataFrame(data)
df['improvement'] = df['cos_distance']-df['reranked_cos_distance']
df

2it [00:01,  1.67it/s]


Unnamed: 0,reranked_cos_distance,cos_distance,improvement
0,0.449987,0.417194,-0.032792
1,0.473011,0.473011,0.0


count    5.000000e+01
mean     0.000000e+00
std      5.934392e-17
min     -1.110223e-16
25%      0.000000e+00
50%      0.000000e+00
75%      0.000000e+00
max      1.110223e-16
Name: improvement, dtype: float64