In [1]:

from langchain_chroma.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

from openevals.llm import create_llm_as_judge
from openevals.prompts import RETRIEVAL_HELPFULNESS_PROMPT

import pandas as pd
import time




In [2]:
# Initialize embeddings model (ensure the same model used during creation)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Load the existing Chroma vector store
persist_directory = "../db/swim_rules_semantic"
vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)




## Helper Functions


In [3]:

# Function to perform semantic search
def semantic_search(query, k=10):
    """
    Perform a semantic search on the Chroma vector store.

    Args:
        query (str): The query string to search for.
        k (int): The number of top results to retrieve.

    Returns:
        list: A list of documents matching the query.
    """
    results = vector_store.similarity_search_with_score(query, k=k,)
    chunks = [{"title": f"chunk_{chunk+1}", "content": result[0].page_content} for chunk, result in enumerate(results)]

    relevance = chunk_evaluator(inputs=query, outputs=chunks)

    return relevance




chunk_evaluator = create_llm_as_judge(
    prompt=RETRIEVAL_HELPFULNESS_PROMPT,
    feedback_key="retrieval_helpfulness",
    model="openai:gpt-4o",
)




## Test chunk retrieval


In [4]:
%%time
# Example usage
queries = [
    "Prior to the start signal the swimmer is seen rocking back and forth.  Is this a disqualification?",
    "upon the command 'take your mark', the swimmer takes their starting position and just prior to the start signal, the swimmer is seen moving forward.",
    "What happens if a swimmer touches the wall with one hand in freestyle?",
    "what happens if a swimmer touches the wall with one hand in breaststroke?",
    "A swimmer is wearing a wetsuit in a swimming competition?",
    "During a freestyle event, a swimmer does a flip turn and touches the wall with only one foot.",
    "A breaststroke swimmer moves their hands in a sculling or flipper movement at the end of the first arm stroke, both after the start and after the turn Should they be disqualified?",
    "In a 9-10 100 yard breaststroke event, a swimmer completes 50 yards with a simulatenous two-hand touch and, thinking that the race is over, pushes back from the wall to read the scoreboard. At this point, realizing that the race is only halfway over, the swimmer returns to the wall, pushes off on the breast, and completes the required distance in good form. What call, if any, should be made?",
    "In a 9-10 100 yard breaststroke event, a swimmer completes 50 yards with a simulatenous two-hand touch and, thinking that the race is over, pushes back from the wall on their back to read the scoreboard. At this point, realizing that the race is only halfway over, the swimmer returns to the wall, pushes off on the breast, and completes the required distance in good form. What call, if any, should be made?",
    "A swimmer swims the breaststroke in a way that their hands are completely underwater when they are pushed forward together from their breast.",
    "A swimmer swims the breaststroke in a way that their elbows are above the water line during the forward movement of the arms.",
    "A breaststroker’s head breaks the surface of the water during each cycle, however, the swimmer does not take a breath even if the head breaks the surface.",
    "During a freestyle event, a swimmer starts in the water such that the swimmwer is facing the starting end of the pool with one hand on the wall.",
    "During a freestyle event, a swimmer starts in the water such that the swimmer is facing the other end of the pool with one hand on the wall.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer is facing the other end of the pool with one hand on the wall.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer has one hand on the wall looking away from the starting block.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer is looking at the opposite end of the pool.",
    "In a 200m individual medley event, a swimmer swims the first 50m in butterfly, the second 50m in backstroke, the third 50m starts swimming freestyle but then switches to breaststroke, and the last 50m in freestyle.",
    "In a 100y freestyle event, the swimmer swims the first 50y in butterfly and the last 50y in backstroke.",
]

relevance_list = []
for i, query in enumerate(queries):
    print(f"\nQuery: {query}")
    start_time = time.time()
    relevance = semantic_search(query)
    end_time = time.time()

    relevance["query"] = query
    relevance["evaluation_time"] = end_time - start_time
    relevance_list.append(relevance)

    print(f"completed {i+1} out of {len(queries)} queries")





Query: Prior to the start signal the swimmer is seen rocking back and forth.  Is this a disqualification?
completed 1 out of 19 queries

Query: upon the command 'take your mark', the swimmer takes their starting position and just prior to the start signal, the swimmer is seen moving forward.
completed 2 out of 19 queries

Query: What happens if a swimmer touches the wall with one hand in freestyle?
completed 3 out of 19 queries

Query: what happens if a swimmer touches the wall with one hand in breaststroke?
completed 4 out of 19 queries

Query: A swimmer is wearing a wetsuit in a swimming competition?
completed 5 out of 19 queries

Query: During a freestyle event, a swimmer does a flip turn and touches the wall with only one foot.
completed 6 out of 19 queries

Query: A breaststroke swimmer moves their hands in a sculling or flipper movement at the end of the first arm stroke, both after the start and after the turn Should they be disqualified?
completed 7 out of 19 queries

Query: I

In [5]:
relevance_df = pd.DataFrame(relevance_list)
relevance_df = relevance_df[["key", "score", "evaluation_time", "query", "comment"]]
relevance_df

Unnamed: 0,key,score,evaluation_time,query,comment
0,retrieval_helpfulness,False,11.807663,Prior to the start signal the swimmer is seen ...,The input question asks whether a swimmer rock...
1,retrieval_helpfulness,False,8.947018,"upon the command 'take your mark', the swimmer...",To evaluate the relevance of the retrieved out...
2,retrieval_helpfulness,True,3.378093,What happens if a swimmer touches the wall wit...,"Upon evaluating the provided outputs, the resp..."
3,retrieval_helpfulness,True,6.45091,what happens if a swimmer touches the wall wit...,The input asks what happens if a swimmer touch...
4,retrieval_helpfulness,False,11.367125,A swimmer is wearing a wetsuit in a swimming c...,To evaluate the relevance of the outputs relat...
5,retrieval_helpfulness,False,9.52866,"During a freestyle event, a swimmer does a fli...",To assess the relevance of the retrieved outpu...
6,retrieval_helpfulness,True,27.378261,A breaststroke swimmer moves their hands in a ...,Based on the information provided in the outpu...
7,retrieval_helpfulness,True,11.946889,"In a 9-10 100 yard breaststroke event, a swimm...","In this scenario, we need to evaluate whether ..."
8,retrieval_helpfulness,True,9.412799,"In a 9-10 100 yard breaststroke event, a swimm...",The situation in the input queries about a swi...
9,retrieval_helpfulness,False,9.771419,A swimmer swims the breaststroke in a way that...,Let's evaluate the retrieved outputs for their...


In [6]:
relevance_df.evaluation_time.sum()

191.65793538093567

In [7]:
relevance_df.describe()

Unnamed: 0,evaluation_time
count,19.0
mean,10.08726
std,4.840351
min,3.378093
25%,8.200002
50%,9.52866
75%,11.346206
max,27.378261


In [8]:
relevance_df.score.value_counts()

score
False    11
True      8
Name: count, dtype: int64

In [9]:
import textwrap
for relevance in relevance_list:
    print(f"\nQuery: {textwrap.fill(relevance['query'], width=90,replace_whitespace=False)}")
    print(f">>>Evaluation Time: {relevance['evaluation_time']:.2f} seconds")
    print(f">>>Relevance: {relevance['key']} {relevance['score']},\n{textwrap.fill(relevance['comment'], width=90,replace_whitespace=False)}")



Query: Prior to the start signal the swimmer is seen rocking back and forth.  Is this a
disqualification?
>>>Evaluation Time: 11.81 seconds
>>>Relevance: retrieval_helpfulness False,
The input question asks whether a swimmer rocking back and forth prior to the start signal
would lead to disqualification. This involves understanding the rules surrounding starts
and potential disqualifications:

1. **Relevant Information from Retrieved Outputs:**
   -
**Chunk 1 & 2:** Define false starts and starter responsibilities, but don't explicitly
state that rocking is a reason for disqualification (False Starts sections). They mention
starting cues and false starts, which could imply the relevance of a swimmer's actions at
the block.
   - **Chunk 1:** Mentions that swimmers must be stationary before the start
signal is given, which indirectly suggests that rocking might be problematic as it implies
lack of stillness.
   - **Chunk 2:** Discusses starter and referee responsibilities,
touching upon