In [5]:
from langchain_chroma.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

from openevals.llm import create_llm_as_judge
from openevals.prompts import RETRIEVAL_HELPFULNESS_PROMPT

In [6]:


# Initialize embeddings model (ensure the same model used during creation)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Load the existing Chroma vector store
persist_directory = "../db/swim_rules_semantic"
vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


## Helper Functions

In [7]:

# Function to perform semantic search
def semantic_search(query, k=4):
    """
    Perform a semantic search on the Chroma vector store.

    Args:
        query (str): The query string to search for.
        k (int): The number of top results to retrieve.

    Returns:
        list: A list of documents matching the query.
    """
    results = vector_store.similarity_search_with_score(query, k=k,)
    
    for chunk, result in enumerate(results):
        doc, score = result
        relevance =chunk_evaluator(inputs=query, outputs=doc.page_content)
        print(f"\n>>>Chunk: {chunk+1} score: {score:0.4f} {relevance}Page {doc.metadata['page']}: {doc.page_content}...")
    return results


chunk_evaluator = create_llm_as_judge(
    prompt=RETRIEVAL_HELPFULNESS_PROMPT,
    feedback_key="retrieval_helpfulness",
    model="openai:gpt-4o",
)

## Test chunk retrieval

In [9]:

# Example usage
queries = [
    "During a butterfly event, a swimmer starts in the water such that the swimmer is facing away from the starting block.",
]

for query in queries:
    print(f"\nQuery: {query}")
    print(semantic_search(query))


Query: During a butterfly event, a swimmer starts in the water such that the swimmer is facing away from the starting block.

>>>Chunk: 1 score: 0.6893 {'key': 'retrieval_helpfulness', 'score': True, 'comment': 'The input describes a specific aspect of a butterfly swimming event involving a swimmer starting in the water facing away from the starting block. \n\nTo accurately answer the input, the relevant pieces of information needed include:\n1. **Starting rules for butterfly events:** Understanding whether starting in water facing away from the wall is permissible or how it aligns with current rules.\n2. **Any specific regulations regarding swimmer positioning in the water at start.**\n\nThe output provides the following relevant information:\n- "An in-water forward start is allowed for butterfly, breaststroke, and freestyle."\n- "For an in-water forward start, the swimmer shall have at least one hand in contact with the wall."\n\n**Evaluation:**\n- **Directly relevant information:**

In [4]:

# Example usage
queries = [
    "Prior to the start signal the swimmer is seen rocking back and forth.  Is this a disqualification?",
    "upon the command 'take your mark', the swimmer takes their starting position and just prior to the start signal, the swimmer is seen moving forward.",
    "What happens if a swimmer touches the wall with one hand in freestyle?",
    "what happens if a swimmer touches the wall with one hand in breaststroke?",
    "A swimmer is wearing a wetsuit in a swimming competition?",
    "During a freestyle event, a swimmer does a flip turn and touches the wall with only one foot.",
    "A breaststroke swimmer moves their hands in a sculling or flipper movement at the end of the first arm stroke, both after the start and after the turn Should they be disqualified?",
    "In a 9-10 100 yard breaststroke event, a swimmer completes 50 yards with a simulatenous two-hand touch and, thinking that the race is over, pushes back from the wall to read the scoreboard. At this point, realizing that the race is only halfway over, the swimmer returns to the wall, pushes off on the breast, and completes the required distance in good form. What call, if any, should be made?",
    "A swimmer swims the breaststroke in a way that their hands are completely underwater when they are pushed forward together from their breast.",
    "A swimmer swims the breaststroke in a way that their elbows are above the water line during the forward movement of the arms.",
    "A breaststroker’s head breaks the surface of the water during each cycle, however, the swimmer does not take a breath even if the head breaks the surface.",
    "During a freestyle event, a swimmer starts in the water such that the swimmwer is facing the starting end of the pool.",
    "During a freestyle event, a swimmer starts in the water such that the swimmer is facing away from the starting block.",
    "During a butterfly event, a swimmer starts in the water such that the swimmer is facing away from the starting block.",
    "In a 200m individual medley event, a swimmer swims the first 50m in butterfly, the second 50m in backstroke, the third 50m starts swimming freestyle but then switches to breaststroke, and the last 50m in freestyle.",
]

for query in queries[-4:-3]:
    print(f"\nQuery: {query}")
    print(semantic_search(query))


Query: During a freestyle event, a swimmer starts in the water such that the swimmwer is facing the starting end of the pool.

>>>Chunk: 1 score: 0.7462 {'key': 'retrieval_helpfulness', 'score': False, 'comment': 'The prompt\'s input concerns the start of a freestyle swimming event, particularly focusing on a scenario where a swimmer begins the race in the water, facing the end of the pool. The assessor\'s job is to evaluate whether the retrieved outputs provide relevant information that addresses this specific scenario.\n\n1. **Relevant Contextual Information Needed:**\n   - Rules governing the start of a freestyle event\n   - Any stipulations for starting in the water versus starting outside\n   - Relevant swimming techniques or requirements related to the starting phase in an event labeled freestyle\n \n2. **Output Analysis:**\n   - **101.5 FREESTYLE Section:**\n     - .1  "Start — The forward start shall be used."\n     - This clause indicates that a forward start is required in f