In [None]:
import PyPDF2
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate


path = "solana-whitepaper-en.pdf"
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file_path):
    with open(pdf_file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text

# Function to format the prompt for LangChain Ollama
def format_llm_prompt(pdf_text, user_query):
    # System instructions for the LLM
    system_instructions = """
    System Instructions:
    - You are an advanced language model trained to answer questions based on the provided context.
    - Provide accurate, relevant, and concise responses using the given context.
    - If the context does not provide the necessary information, state this clearly in your response.
    """

    # Context from the PDF (truncated for length)
    context = f"Context: {pdf_text[:2000]}..."  # limit context for prompt length

    # Format the prompt for LangChain Ollama
    formatted_prompt = f"""
    <LLM Prompt>
    <System Instructions>
    {system_instructions}
    
    <Context>
    {context}
    
    <User Query>
    {user_query}
    """
    
    return formatted_prompt

# Initialize the Ollama LLM
def initialize_ollama_model():
    llm = Ollama(model="llama2")  # Specify the model name, e.g., "llama2" or any supported Ollama model.
    return llm

# Example usage
def main(pdf_path, user_query):
    # Step 1: Extract text from the PDF
    pdf_text = extract_text_from_pdf(pdf_path)
    
    # Step 2: Format the LLM prompt
    formatted_prompt = format_llm_prompt(pdf_text, user_query)
    
    # Step 3: Initialize LangChain Ollama and run the query
    llm = initialize_ollama_model()
    
    # Step 4: Use LangChain's LLM API to get a response from Ollama
    response = llm(formatted_prompt)
    
    # Output the response from the LLM
    print("Response from LLM:")
    print(response)

# Example execution
pdf_path = 'example_document.pdf'  # Replace with your PDF file path
user_query = "What are the key highlights of this document?"

# Run the main process
main(pdf_path, user_query)