In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import os
import glob
from pathlib import Path

In [11]:
# get the pdf files path
docs_dir = "../documents"
pdf_files = glob.glob(f"{docs_dir}/*.pdf")


# add pdf files to docs after loading with pdfLoader.
docs = []
for f in pdf_files:
    docs += PyPDFLoader(f).load()
    

In [None]:
# split text into chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 800, chunk_overlap=120)
all_splits = text_splitter.split_documents(docs)

print(f"Split documents into {len(all_splits)} sub-documents.")


Split documents into 1429 sub-documents.


In [14]:
# get embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [15]:
# Vector Store (Chroma)
vector_store = Chroma.from_documents(all_splits, embeddings, persist_directory="chroma")

In [7]:
# retriever
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [16]:
llm = OllamaLLM(model="llama3.1:8b")

prompt = ChatPromptTemplate.from_template(
    """You are a hockey rules assistant.
    Use the context to answer. Cite sources as (page/section) from metadata.
    Question: {question}
    Context: {context}
    Answer with bullet points and citations."""
)


def format_docs(docs):
    return "\n\n".join(
        f"- {d.page_content}\n  [source: {d.metadata.get('source','?')}, page {d.metadata.get('page','?')}]"
        for d in docs
    )

rag_chain = (
    {"context": (lambda x: x["question"]) | retriever | format_docs, "question": lambda x: x["question"]}
    | prompt
    | llm
    | StrOutputParser()
)

In [20]:
q = "Where is the faceoff taken after a puck out of bounds?"
print(rag_chain.invoke({"question": q}))


• If the puck goes outside the playing area directly off a face-off, the face-off remains in the same spot (page 146).
• No penalty is assessed to either team for delaying the game if the puck goes outside the playing area directly off a face-off.
• If the puck is shot and makes contact with the gloves or body of a player hanging over the bench, or enters the players' bench through an open bench door, the face-off takes place at the nearest face-off spot in the zone from [../documents\2025-26_iihf_rulebook_30062025-v1.pdf, page 146].

Note: There is no specific rule stating where the face-off should take place after a puck out of bounds, other than the conditions mentioned above. The general principle is to give the offending team the least amount of "territorial advantage" (page 146).
