In [1]:

from langchain_chroma.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

from openevals.llm import create_llm_as_judge
from openevals.prompts import RETRIEVAL_HELPFULNESS_PROMPT

import pandas as pd
import time




In [2]:
# Initialize embeddings model (ensure the same model used during creation)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Load the existing Chroma vector store
persist_directory = "../db/swim_rules_semantic"
vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)




## Helper Functions


In [3]:

# Function to perform semantic search
def semantic_search(query, k=10):
    """
    Perform a semantic search on the Chroma vector store.

    Args:
        query (str): The query string to search for.
        k (int): The number of top results to retrieve.

    Returns:
        list: A list of documents matching the query.
    """
    results = vector_store.similarity_search_with_score(query, k=k,)
    chunks = [{"title": f"chunk_{chunk+1}", "content": result[0].page_content} for chunk, result in enumerate(results)]

    relevance = chunk_evaluator(inputs=query, outputs=chunks)

    return relevance




chunk_evaluator = create_llm_as_judge(
    prompt=RETRIEVAL_HELPFULNESS_PROMPT,
    feedback_key="retrieval_helpfulness",
    model="openai:gpt-4o",
)




## Test chunk retrieval


In [4]:
%%time
# Example usage
queries = [
    "Prior to the start signal the swimmer is seen rocking back and forth.  Is this a disqualification?",
    "upon the command 'take your mark', the swimmer takes their starting position and just prior to the start signal, the swimmer is seen moving forward.",
    "What happens if a swimmer touches the wall with one hand in freestyle?",
    "what happens if a swimmer touches the wall with one hand in breaststroke?",
    "A swimmer is wearing a wetsuit in a swimming competition?",
    "During a freestyle event, a swimmer does a flip turn and touches the wall with only one foot.",
    "A breaststroke swimmer moves their hands in a sculling or flipper movement at the end of the first arm stroke, both after the start and after the turn Should they be disqualified?",
    "In a 9-10 100 yard breaststroke event, a swimmer completes 50 yards with a simulatenous two-hand touch and, thinking that the race is over, pushes back from the wall to read the scoreboard. At this point, realizing that the race is only halfway over, the swimmer returns to the wall, pushes off on the breast, and completes the required distance in good form. What call, if any, should be made?",
    "In a 9-10 100 yard breaststroke event, a swimmer completes 50 yards with a simulatenous two-hand touch and, thinking that the race is over, pushes back from the wall on their back to read the scoreboard. At this point, realizing that the race is only halfway over, the swimmer returns to the wall, pushes off on the breast, and completes the required distance in good form. What call, if any, should be made?",
    "A swimmer swims the breaststroke in a way that their hands are completely underwater when they are pushed forward together from their breast.",
    "A swimmer swims the breaststroke in a way that their elbows are above the water line during the forward movement of the arms.",
    "A breaststroker’s head breaks the surface of the water during each cycle, however, the swimmer does not take a breath even if the head breaks the surface.",
    "During a freestyle event, a swimmer starts in the water such that the swimmwer is facing the starting end of the pool with one hand on the wall.",
    "During a freestyle event, a swimmer starts in the water such that the swimmer is facing the other end of the pool with one hand on the wall.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer is facing the other end of the pool with one hand on the wall.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer has one hand on the wall looking away from the starting block.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer is looking at the opposite end of the pool.",
    "In a 200m individual medley event, a swimmer swims the first 50m in butterfly, the second 50m in backstroke, the third 50m starts swimming freestyle but then switches to breaststroke, and the last 50m in freestyle.",
    "In a 100y freestyle event, the swimmer swims the first 50y in butterfly and the last 50y in backstroke.",
]

revelance_list = []
for i, query in enumerate(queries):
    print(f"\nQuery: {query}")
    start_time = time.time()
    relevance = semantic_search(query)
    end_time = time.time()

    relevance["query"] = query
    relevance["evaluation_time"] = end_time - start_time
    revelance_list.append(relevance)

    print(f"completed {i+1} out of {len(queries)} queries")





Query: Prior to the start signal the swimmer is seen rocking back and forth.  Is this a disqualification?
completed 1 out of 19 queries

Query: upon the command 'take your mark', the swimmer takes their starting position and just prior to the start signal, the swimmer is seen moving forward.
completed 2 out of 19 queries

Query: What happens if a swimmer touches the wall with one hand in freestyle?
completed 3 out of 19 queries

Query: what happens if a swimmer touches the wall with one hand in breaststroke?
completed 4 out of 19 queries

Query: A swimmer is wearing a wetsuit in a swimming competition?
completed 5 out of 19 queries

Query: During a freestyle event, a swimmer does a flip turn and touches the wall with only one foot.
completed 6 out of 19 queries

Query: A breaststroke swimmer moves their hands in a sculling or flipper movement at the end of the first arm stroke, both after the start and after the turn Should they be disqualified?
completed 7 out of 19 queries

Query: I

In [5]:
revelance_df = pd.DataFrame(revelance_list)
revelance_df = revelance_df[["key", "score", "evaluation_time", "query", "comment"]]
revelance_df

Unnamed: 0,key,score,evaluation_time,query,comment
0,retrieval_helpfulness,False,13.684895,Prior to the start signal the swimmer is seen ...,The input question asks if a swimmer rocking b...
1,retrieval_helpfulness,True,12.065718,"upon the command 'take your mark', the swimmer...",To evaluate the relevance of the retrieved out...
2,retrieval_helpfulness,True,7.063066,What happens if a swimmer touches the wall wit...,The input asks about the implications for a fr...
3,retrieval_helpfulness,True,9.013899,what happens if a swimmer touches the wall wit...,The desired information from the input query r...
4,retrieval_helpfulness,False,4.157364,A swimmer is wearing a wetsuit in a swimming c...,To evaluate the relevance of the retrieved inf...
5,retrieval_helpfulness,False,15.483246,"During a freestyle event, a swimmer does a fli...",The input focuses on a swimmer performing a fl...
6,retrieval_helpfulness,False,5.710802,A breaststroke swimmer moves their hands in a ...,The input asks whether a breaststroke swimmer ...
7,retrieval_helpfulness,False,12.068385,"In a 9-10 100 yard breaststroke event, a swimm...",The input scenario describes a moment during a...
8,retrieval_helpfulness,False,13.7479,"In a 9-10 100 yard breaststroke event, a swimm...",The input scenario outlines a possible infract...
9,retrieval_helpfulness,False,6.833106,A swimmer swims the breaststroke in a way that...,The input describes a swimming technique in th...


In [6]:
revelance_df.evaluation_time.sum()

242.9420804977417

In [7]:
revelance_df.describe()

Unnamed: 0,evaluation_time
count,19.0
mean,12.786425
std,6.943906
min,4.157364
25%,7.730208
50%,11.665691
75%,14.615573
max,29.076185


In [8]:
revelance_df.score.value_counts()

score
True     10
False     9
Name: count, dtype: int64

In [12]:
import textwrap
for relevance in revelance_list:
    print(f"\nQuery: {textwrap.fill(relevance['query'], width=90,replace_whitespace=False)}")
    print(f">>>Evaluation Time: {relevance['evaluation_time']:.2f} seconds")
    print(f">>>Relevance: {relevance['key']} {relevance['score']},\n{textwrap.fill(relevance['comment'], width=90,replace_whitespace=False)}")



Query: Prior to the start signal the swimmer is seen rocking back and forth.  Is this a
disqualification?
>>>Evaluation Time: 13.68 seconds
>>>Relevance: retrieval_helpfulness False,
The input question asks if a swimmer rocking back and forth prior to the start signal
constitutes a disqualification. To answer this, relevant information on specific swimming
rules and regulations concerning starts is necessary.

1. **Relevant Information for
Evaluation**:
   - Rules on starting procedures for swimmers.
   - Regulations regarding
movements on the starting platform before the start signal is given.
   - Consideration of
actions that can lead to disqualification due to misconduct or not adhering to start
commands.

2. **Findings in the Retrieved Outputs**:
   - **Relevant Information**:
     -
**Chunk 1 & 2**: Discusses procedures around starting commands, actions like "take your
mark," and potential disqualification rules for actions like false starts and deliberate
delays or misconduct d