In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
nomic_api_key = os.getenv("NOMIC_API_KEY")


In [2]:
from nomic import embed, login
from langchain.embeddings.base import Embeddings

class NomicEmbedText(Embeddings):
    def __init__(self, api_key: str):
        login(api_key)  # this sets the global token

    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        response = embed.text(
            texts=texts,
            model='nomic-embed-text-v1',
        )
        return response["embeddings"]

    def embed_query(self, text: str) -> list[float]:
        return self.embed_documents([text])[0]


In [3]:
from langchain_community.vectorstores.pgvector import PGVector

COLLECTION_NAME = "pdf_chunks"
CONNECTION_STRING = "postgresql+psycopg2://ai:ai@localhost:5532/ai"

nomic_embeddings = NomicEmbedText(api_key=nomic_api_key)

vectorstore = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=nomic_embeddings,
)


  vectorstore = PGVector(
  vectorstore = PGVector(


In [4]:
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

def read_and_split_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_text(text)
    docs = [Document(page_content=chunk) for chunk in chunks]
    return docs


In [5]:
from langchain_community.vectorstores.pgvector import PGVector

COLLECTION_NAME = "pdf_chunks"
CONNECTION_STRING = "postgresql+psycopg2://ai:ai@localhost:5532/ai"

nomic_embeddings = NomicEmbedText(api_key=nomic_api_key)

vectorstore = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=nomic_embeddings,
)


  vectorstore = PGVector(


In [6]:
def store_pdf_in_vectorstore(file_path):
    docs = read_and_split_pdf(file_path)
    vectorstore.add_documents(docs)
    print(f" Stored {len(docs)} chunks in pgvector.")


In [7]:
# to store PDF
store_pdf_in_vectorstore("sample.pdf") 


 Stored 192 chunks in pgvector.


In [8]:
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

def read_and_split_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_text(text)
    docs = [Document(page_content=chunk) for chunk in chunks]
    return docs


In [9]:
from groq import Groq

groq_client = Groq(api_key=groq_api_key)

def ask_llm(question, context, model="llama3-70b-8192"):
    prompt = f"""You are a helpful assistant answering questions based on the provided PDF context.

Context:
{context}

Question:
{question}

Answer:"""

    chat_completion = groq_client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model
    )
    return chat_completion.choices[0].message.content.strip()


In [10]:
def answer_question_from_pdf(query, top_k=5):
    docs = vectorstore.similarity_search(query, k=top_k)
    context = "\n\n".join([doc.page_content for doc in docs])
    return ask_llm(query, context)

In [11]:
question = "What is the easiest dish in this pdf?"
answer = answer_question_from_pdf(question)

print(" Question:", question)
print(" Answer:", answer)


 Question: What is the easiest dish in this pdf?
 Answer: I'm happy to help! However, I must point out that there are no specific dish recipes mentioned in the provided PDF context. The context appears to be related to a cooking course or textbook, listing various herbs and spices, and outlining learning objectives for a chapter on introduction to worldwide cuisines.

Therefore, it is not possible to identify the easiest dish in this PDF, as there are no dishes mentioned. If you have any further questions or if there's anything else I can assist you with, please feel free to ask!
