In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import os
import glob
from pathlib import Path

In [11]:
# get the pdf files path
docs_dir = "../documents"
pdf_files = glob.glob(f"{docs_dir}/*.pdf")


# add pdf files to docs after loading with pdfLoader.
docs = []
for f in pdf_files:
    docs += PyPDFLoader(f).load()
    

In [24]:
# split text into chunks

HEADING_SEPARATORS = [
    r"\nSITUATION\s+\d+\.\d+[^\n]*\n",     # Situation Handbook entries
    r"\nRULE\s+\d+[^\n]*\n",               # Rulebook: "RULE 60 High-sticking"
    r"\nSECTION\s+\d+[^\n]*\n",            # "SECTION 08. STICK INFRACTIONS"
    r"\n[A-Z][A-Z &/’'–\-]{4,}\n",         # ALL-CAPS headings like "EQUIPMENT"
    r"\n{2,}",                             # blank lines (paragraph breaks)
    r"\n",                                 # single newline
    r" "                                   # as last resort
]

coarse_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1800,
    chunk_overlap=50,
    separators=HEADING_SEPARATORS,
    keep_separator=True,
    is_separator_regex=True
    )


fine_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=120,
    separators=[r"\n{2,}", r"\n", r" "],
    keep_separator=False,
    is_separator_regex=True
)

coarse_chunks = coarse_text_splitter.split_documents(docs)
final_chunks = []
for d in coarse_chunks:
    # split content of each coarse chunk further
    sub_docs = fine_splitter.split_documents([d])
    final_chunks.extend(sub_docs)



print(f"Coarse chunks: {len(coarse_chunks)} | Final chunks: {len(final_chunks)}")


Coarse chunks: 843 | Final chunks: 1685


In [25]:
# get embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [26]:
# Vector Store (Chroma)
vector_store = Chroma.from_documents(final_chunks, embeddings, persist_directory="chroma")

In [29]:
# retriever
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [38]:
llm = OllamaLLM(model="llama3.1:8b")

prompt = ChatPromptTemplate.from_template(
    """You are a hockey rules assistant.
    Use the context to answer. Cite sources as (page/section) from metadata.
    Question: {question}
    Context: {context}
    Answer with bullet points and citations."""
)


def format_docs(docs):
    """Format a list of documents into a readable string with sources and page numbers."""
    formatted_docs = []

    for d in docs:
        content = d.page_content
        source = d.metadata.get("source", "?")
        page = d.metadata.get("page", "?")

        formatted_doc = f"- {content}\n  [source: {source}, page {page}]"
        formatted_docs.append(formatted_doc)

    return "\n\n".join(formatted_docs)


rag_chain = (
    {"context": (lambda x: x["question"]) | retriever | format_docs, "question": lambda x: x["question"]}
    | prompt
    | llm
    | StrOutputParser()
)

In [39]:
q = "Does a player have to play the puck if possible during an icing situation?"
print(rag_chain.invoke({"question": q}))


Here are the bullet points answering your question:

• A player does not necessarily have to play the puck in order for it not to be considered icing.
• If a team is making substitutions during an icing situation and has the opportunity to play the puck, but chooses not to do so, icing shall not be called. (Rule 81.6 [page 138])
• The potential icing call can be avoided by playing the puck or skating in the direction of the puck at any time, which will continue play without a stoppage. (Rule 81.3 [page 135])

Note that the rules emphasize the importance of allowing play to continue and not calling icing if the opposing team has the opportunity to play the puck, rather than requiring them to play it.


• If the puck goes outside the playing area directly off a face-off, the face-off remains in the same spot (page 146).
• No penalty is assessed to either team for delaying the game if the puck goes outside the playing area directly off a face-off.
• If the puck is shot and makes contact with the gloves or body of a player hanging over the bench, or enters the players' bench through an open bench door, the face-off takes place at the nearest face-off spot in the zone from [../documents\2025-26_iihf_rulebook_30062025-v1.pdf, page 146].

Note: There is no specific rule stating where the face-off should take place after a puck out of bounds, other than the conditions mentioned above. The general principle is to give the offending team the least amount of "territorial advantage" (page 146).