In [30]:
# check if it works
print("Hello")

Hello


In [31]:
# imports
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [32]:
# extract data from the pdf file
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documemts = loader.load()
    return documemts

In [33]:
extracted_data = load_pdf_file(data = "../data/")
extracted_data

[Document(metadata={'producer': 'Microsoft® Word pour Microsoft\xa0365', 'creator': 'Microsoft® Word pour Microsoft\xa0365', 'creationdate': '2024-07-31T14:32:14+02:00', 'moddate': '2024-07-31T14:32:14+02:00', 'source': '../data/fia_2025_formula_1_sporting_regulations_-_issue_1_-_2024-07-31.pdf', 'total_pages': 107, 'page': 0, 'page_label': '1'}, page_content='2025 Formula 1 Sporting Regulations 1/107 31 July 2024 \n©2024 Fédération Internationale de l’Automobile  Issue 1 \n 2025 FORMULA ONE SPORTING REGULATIONS \nPUBLISHED ON 31 JULY 2024 \nIssue 1 \n \nConvention: \nBlack text: unchanged text from the 2024 F1 Sporting Regulations Issue 7, approved by the WMSC on 31/07/2024 \nDark Red Text: changes relative to 2024 F1 Sporting Regulations Issue 7, previously approved by the WMSC. \nPink Text: changes relative to 2024 F1 Sporting Regulations Issue 7, approved by the WMSC on 31 July 2024. \n \n \nART CONTENTS PAGE \n1 REGULATIONS 2 \n2 GENERAL UNDERTAKING 2 \n3 GENERAL CONDITIONS 2 \n4 

In [34]:
# chunking
def text_split(extracted_data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    chunks = splitter.split_documents(extracted_data)
    return chunks

In [35]:
text_chunks = text_split(extracted_data)
print("Length of text chunks: ", len(text_chunks))

Length of text chunks:  751


In [36]:
# embeddings
from langchain.embeddings import HuggingFaceBgeEmbeddings
! pip install sentence-transformers



In [37]:
def download_huggingface_model(model_name):
    embeddings = HuggingFaceBgeEmbeddings(model_name = model_name)
    return embeddings

In [38]:
# ! pip install sentence-transformers
#!pip show sentence-transformers
! pip install --upgrade transformers huggingface_hub




In [40]:
# vector db
import os
from dotenv import load_dotenv
load_dotenv()

True

In [42]:
from langchain.embeddings import OpenAIEmbeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Initialize OpenAI embeddings (requires OpenAI API key)
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002"  # The best OpenAI embedding model for most use cases
)

In [43]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [52]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec 

In [53]:
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "f1regbot1"

pc.create_index(
    name = index_name,
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [54]:
# embed chunks and put into your pinecone index
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents = text_chunks,
    embedding = embeddings,
    index_name = index_name
)

In [68]:
retriever = docsearch.as_retriever(search_type = "similarity", search_kwargs = {"k":15})

In [69]:
retrieved_docs = retriever.invoke("How does one win the world driver champoinship?")
retrieved_docs

[Document(metadata={'creationdate': '2024-07-31T14:32:14+02:00', 'creator': 'Microsoft® Word pour Microsoft\xa0365', 'moddate': '2024-07-31T14:32:14+02:00', 'page': 7.0, 'page_label': '8', 'producer': 'Microsoft® Word pour Microsoft\xa0365', 'source': '../data/fia_2025_formula_1_sporting_regulations_-_issue_1_-_2024-07-31.pdf', 'total_pages': 107.0}, page_content='a) Prior to the start of the 2014 FIA Formula One World Championship competition numbers \nwere permanently allocated to drivers by ballot, such numbers must then be used by that \ndriver during every Formula One World Championship Competition he takes part in \nthroughout his career in Formula 1.  \nb) A driver’s career in Formula 1 will be deemed to have ended if he does not participate in \na Competition for two (2) entire consecutive Championships.'),
 Document(metadata={'creationdate': '2024-07-31T14:32:14+02:00', 'creator': 'Microsoft® Word pour Microsoft\xa0365', 'moddate': '2024-07-31T14:32:14+02:00', 'page': 5.0, 'pa

In [71]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens=500)

In [70]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an expert in Formula 1 rules and regulations, providing clear, concise, and up-to-date answers based on the official FIA "
    "sporting and technical regulations. Use the provided context {context} to ensure accuracy and reference specific "
    "regulations when relevant. You explain race procedures, penalties, technical specifications, "
    "and driver/team obligations in an easy-to-understand manner. If a rule has changed recently, "
    "clarify the update. Maintain a friendly yet authoritative tone, ensuring fans of all experience "
    "levels can grasp the information."
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [72]:
qa_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, qa_chain)

In [73]:
response = rag_chain.invoke({"input":"How does one win the world driver champoinship?, What happens if If two or more drivers have the same number of points "})
print(response["answer"])


System: To win the world driver championship, a driver must score the highest number of points throughout the Formula 1 season. If two or more drivers finish the championship with the same number of points, the higher place will be awarded to the driver with the most first-place finishes in races. If the number of first-place finishes is also tied, the driver with the most second-place finishes will be awarded the higher place, and so on until a winner emerges. If this procedure fails to produce a result, the FIA will nominate the winner based on its own criteria.
