In [215]:
import os
import re

from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader, PDFMinerLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers.multi_query import MultiQueryRetriever

from langsmith import traceable

from ragas import EvaluationDataset
import json
import pandas as pd

# openAI embeddings
from langchain_openai.embeddings import OpenAIEmbeddings

# vector store
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

# load api keys
from dotenv import load_dotenv
load_dotenv()

True

In [216]:
#load document
file = "../documents/2025-26_iihf_rulebook.pdf"
loader = PyPDFLoader(file)
docs = loader.load()

#drop TOC and appendix
docs_cropped = docs[15:160]

# sticth docs back together
parts = []
for i, d in enumerate(docs_cropped, start=1):
    parts.append(f"\n\n<<<PAGE {i}>>>\n{d.page_content.strip()}")

merged_text = "".join(parts)

# Create a Document and update metadata
merged_doc = [Document(
    page_content=merged_text,
    metadata={
        "source": "IIHF Rulebook 2025-26",
        "page_count": len(docs_cropped),
    }
)]


In [None]:
# for spltting
main_rule_re = r"RULE[ \u00A0]+(?P<main_id>\d{1,3})[ \u00A0]+(?P<main_name>[A-Z-']+(?:[ \u00A0][A-Z-']+)*)"
sub_rule_re = r"(?P<sub_id>\d{1,3}\.\d{1,2})[.\u00A0 ]*[ \u00A0]+(?P<sub_name>[A-Z-']+(?:[ \u00A0][A-Z-']+)*)"


# for metadata
MAIN_RE = re.compile(main_rule_re)
SUB_RE = re.compile(sub_rule_re)


def normalize_ocr(text: str) -> str:
    # collapse weird spaces (regular + non-breaking)
    text = re.sub(r"[ \u00A0]+", " ", text)

    # fix error for 'penalty' which somtimes is 'penal ty'
    text = re.sub(r"\bPENAL\s*TY\b", "PENALTY", text)

    return text



#match regex pattern for metadata. Extracts main-rule, main-rule-id, main-rule-name. Same for sub-rule.
def rule_metadata(doc):
    doc.page_content = normalize_ocr(doc.page_content)
    text = doc.page_content.lstrip()

    m_main = MAIN_RE.search(text)
    if m_main:
        doc.metadata["main_rule"] = f"RULE {m_main['main_id']} {m_main['main_name'].strip()}"
        doc.metadata["main_rule_id"] = m_main.group("main_id")
        doc.metadata["main_rule_name"] = m_main.group("main_name")

    m_sub = SUB_RE.search(text)
    if m_sub:
        doc.metadata["sub_rule"] = f"{m_sub['sub_id']} {m_sub['sub_name'].strip()}"
        doc.metadata["sub_rule_id"] = m_sub.group("sub_id")
        doc.metadata["sub_rule_name"] = m_sub.group("sub_name")

    return doc


# splits text by the rules given in the regex.
def split_rule_text(docs, separator, chunk_size):
    splitter = RecursiveCharacterTextSplitter(
        add_start_index=True,
        keep_separator=True,
        is_separator_regex=True,
        separators=[separator],
        chunk_size=chunk_size,
        chunk_overlap=0
    )
    chunks = splitter.split_documents(docs)
    for chunk in chunks:
        rule_metadata(chunk)
        
    return chunks

    
main_rule_split = split_rule_text(docs=merged_doc, separator=main_rule_re, chunk_size=4000)
sub_rule_split = split_rule_text(docs=main_rule_split, separator=sub_rule_re, chunk_size=600)
print(len(main_rule_split))
print(len(sub_rule_split))



#embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")


# vector store
vectorstore = FAISS.from_documents(documents=sub_rule_split, embedding=embeddings)
vectorstore.save_local("../vectorstore_old")


93
604


In [232]:
# Generation step

# formatting for llm context
def format_docs(docs):
    parts = []
    for doc in docs:
        meta = {
            "main_rule_id": doc.metadata.get("main_rule_id", "N/A"),
            "main_rule_name": doc.metadata.get("main_rule_name", "N/A"),
            "sub_rule_id": doc.metadata.get("sub_rule_id", "N/A"),
            "sub_rule_name": doc.metadata.get("sub_rule_name", "N/A"),
            "source": doc.metadata.get("source", "N/A")
        }
        parts.append(f"Metadata: {meta}\n{doc.page_content.strip()}")
    return "\n\n".join(parts)


# defineing the system template
system_template = """You are an ice hockey rule assistant.

Follow these rules:
- Answer ONLY using the provided context below. If the answer is unknown or not in the context, say "I don't know".
- Use bulletpoints. After each bullet, include a citation using the metadata main rule and sub rule


Rule citation format:
[<sub_rule_id> <main_rule_name> - <sub_rule_name>]


Context (use only what is inside the markers):
---
{context}
---"""

# defining the prompt template
prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("user", "Original question: {question}"),
])


# retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# llm
llm = init_chat_model("gpt-4o-mini", model_provider="openai", temperature=0)




multi_query_system_template = """You are an AI language model assistant. Your task is
    to generate 3 different versions of the given user
    question to retrieve relevant documents from a vector database.
    By generating multiple perspectives on the user question,
    your goal is to help the user overcome some of the limitations
    of distance-based similarity search. Provide these alternative
    questions separated by newlines. The questions are all about ice hockey.
"""

multi_query_prompt_template = ChatPromptTemplate.from_messages([
    ("system", multi_query_system_template),
    ("user", "{question}")
])


multi_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm, include_original=True, prompt=multi_query_prompt_template)


rag_chain = (
    prompt_template
    | llm
    | StrOutputParser()
)

@traceable
def rag_bot(question: str):
    # retrieve
    docs = retriever.invoke(question)
    
    # build context and format docs
    context = format_docs(docs)
    
    # call rag_chain
    answer = rag_chain.invoke({"question": question, "context": context})
    
    return {"answer": answer, "documents": docs}

In [233]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [234]:
# rag test
result = rag_bot("If the puck is shot before the red line, is touched after the red line and the goalie freezes it, can the defensive team change?")
#result = rag_bot("When has a player cleared the zone during delayed offside?")

In [237]:
print(result.get('answer'))
result.get('documents')

- The defending team shall not be permitted to make any Player substitutions prior to the “face-off” if the puck is shot into the end zone and the opposing goalkeeper freezes the puck. [81.1 ICING]
- However, a team is permitted to make a Player substitution to replace an injured Player, or when a penalty has been assessed which affects the “on-ice strength” of either team. [82.1 LINE CHANGE] 
- Since the situation involves the goalkeeper freezing the puck, the defending team cannot change players before the face-off. [81.1 ICING]


[Document(id='1c63d403-fbfa-469e-8c2b-b2747d0376f0', metadata={'source': 'IIHF Rulebook 2025-26', 'page_count': 145, 'start_index': 8262, 'sub-rule': '63.1 DELAYING THE GAME', 'sub_rule_id': '63.1', 'sub_rule_name': 'DELAYING THE GAME'}, page_content='NO LINE CHANGE\nIn the event the goal post is displaced accidentally by a defending Player causing a stoppage in play, the ensuing “face-off” shall \nbe conducted at one of the end zone face-off spots in the defending zone. The offending team shall not be permitted to make any \nPlayer substitutions prior to the “face-off”. However, a team shall be permitted to make a Player substitution to replace a goalkeeper \nwho had been substituted for an extra attacker, to replace an injured Player, or when a penalty has been assessed which affects the \n“on-ice strength” of either team.\nIn the event the puck is shot into the end zone by the attacking team from their own side of the center line, and the opposing goal- \nkeeper freezes the puck res

In [None]:
#- No, the defensive team shall not be permitted to make any Player substitutions prior to the “face-off” in this situation. <63.1 DELAYING THE GAME> 
#- However, they can substitute a Player to replace an injured Player or when a penalty has been assessed which affects the “on-ice strength” of either team. <82.1 LINE CHANGE>

- A player has cleared the zone during a delayed off-side when both skates are off the ice and the Linesperson judges the player to have left the playing surface. <83.3 OFF-SIDE - DELAYED OFF-SIDE>
- If their replacement comes onto the ice in the attacking zone while the delayed off-side is still in effect, they too must clear the attacking zone. <83.3 OFF-SIDE - DELAYED OFF-SIDE>

In [51]:
# setup data for ragas testing

# load file
test_questions = []
with open("eval_questions.jsonl", "r") as f:
    for line in f:
        test_questions.append(json.loads(line))
             
dataset = []
for example in test_questions:
    # extract from eval_questions
    id = example.get('id')
    question = example.get('question')
    ground_truth = example.get('ground truth')
    inner_rule = example.get('inner rule')
    
    # run the ragbot and get output answer
    rag_bot_output = rag_bot(question)
    answer = rag_bot_output.get('answer')
    
    # get context
    docs = rag_bot_output.get('documents')
    contexts = [doc.page_content for doc in docs]
    
    dataset.append({
        "user_input": question,
        "retrieved_contexts": contexts,
        "response": answer,
        "reference": ground_truth,
    })

    


In [52]:
from ragas import EvaluationDataset, evaluate
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

evaluation_dataset = EvaluationDataset.from_list(dataset)
result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=llm)
result

Evaluating:   0%|          | 0/63 [00:00<?, ?it/s]

{'context_recall': 0.8571, 'faithfulness': 0.7579, 'factual_correctness(mode=f1)': 0.6886}

In [None]:
{'context_recall': 0.8571, 'faithfulness': 0.8571, 'factual_correctness(mode=f1)': 0.6186} # single query with top 4 docs retrieved
{'context_recall': 0.8571, 'faithfulness': 0.8608, 'factual_correctness(mode=f1)': 0.6614} # single query with top 6 docs retrieved
{'context_recall': 0.8571, 'faithfulness': 0.7579, 'factual_correctness(mode=f1)': 0.6886} # single query with top 8 docs retrieved
{'context_recall': 0.8571, 'faithfulness': 0.7540, 'factual_correctness(mode=f1)': 0.6400} # multi query with top 4 docs retrieved 
