In [5]:
import os

from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# openAI embeddings
from langchain_openai.embeddings import OpenAIEmbeddings

# vector store
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

# load api keys
from dotenv import load_dotenv
load_dotenv()

True

In [9]:
# document and document loaders
file = "../documents/2025-26_iihf_rulebook.pdf"

loader = PyPDFLoader(file)

docs = loader.load()

for i, d in enumerate(docs, start=1):
    d.metadata['source'] = "IIHF Rulebook 2025-26"
    d.metadata['page_number'] = i
    

In [26]:
len(docs)



228

In [23]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1600,
    chunk_overlap=200,
    add_start_index=True
)

all_splits = text_splitter.split_documents(docs)
len(all_splits)

461

In [27]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [28]:
# Vector store
vector_store = FAISS.from_documents(documents=all_splits, embedding=embeddings)
vector_store.save_local("vs_faiss")


In [None]:
# retriever test

retriever = vector_store.as_retriever(search_kwargs={"k": 1})

results = retriever.batch(
    [
        "What is a slashing penalty?",
        "What is a holding penalty?",
        "What is a tripping penalty?"
    ]
)

for query_results in results:
    for doc in query_results:
        print(doc.page_content)
        print("-----------------------------")


In [45]:
retriever = vector_store.as_retriever()
llm = init_chat_model("gpt-4o-mini", model_provider="openai")

system_template = """
    You are an ice hockey rule assistant. 
     - You answer questions about ice hockey rules in a bullet format
     - You give citations to used rules and cite used pages
     - You answer conceisly and you answer ONLY using he provided context. If answer is unkown, say you don't know.
    """


prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("system", "{context}"),
    ("user", "{question}"), 
])



In [50]:
prompt = prompt_template.invoke({"question": "What is a boarding?", "context":"""RULE 57 TRIPPING
57.1. TRIPPING
A Player shall not place the stick, or any part of their body in such a manner that causes their opponent to trip or fall.
Accidental trips which occur simultaneously with a completed play will not be penalized.
Accidental trips occurring simultaneously with or after a stoppage of play will not be penalized.
57.2. MINOR PENALTY
The Referee shall, at their discretion, assess a minor penalty, based on the severity of the infraction, to any Player who place their
stick or any part of their body in such a manner that it shall cause their opponent to trip and fall.""" })
response = llm.invoke(prompt)

In [51]:
print(response.content)

I don't know.
