In [10]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate  # Added this import

import key
from dotenv import load_dotenv


In [11]:

## Load environment variables (for OpenAI API key)
load_dotenv()

def process_pdfs(pdf_directory):
    print("Processing PDFs...")
    """Process all PDFs in the specified directory and create a vector store."""
    documents = []
    
    # Load all PDFs from the directory
    for file in os.listdir(pdf_directory):
        if file.endswith('.pdf'):
            print(f"Processing {file}...")
            pdf_path = os.path.join(pdf_directory, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=8000,
        chunk_overlap=200,
        length_function=len
    )
    splits = text_splitter.split_documents(documents)
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )
    
    # Create and persist vector store
    vectorstore = Chroma.from_documents(
        documents=splits,
        embedding=embeddings,
        persist_directory="./chroma_db"
    )
    
    return vectorstore



In [12]:
def create_qa_chain_openai(vectorstore, key):
    """Create a question-answering chain using the vector store."""
    # Initialize language model
    llm = ChatOpenAI(api_key=key,temperature=0)
    print("Querying the vector store...")
    
    # Create retrieval chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 10}),
        return_source_documents=True
    )
    
    return qa_chain

In [13]:
def create_qa_chain(vectorstore):
    """Create a question-answering chain using the vector store."""
    # Initialize local Gemma model through Ollama
    llm = Ollama(
        model="gemma2b",
        temperature=0.1,
        top_k=10,
        num_ctx=16000  # Context window size,
        
    )
    
    # Create retrieval chain with custom prompt
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True,
        chain_type_kwargs={
            "prompt": PromptTemplate(
                template="""You are a helpful AI assistant. Use the following pieces of context to answer the question at the end. 
                If you don't know the answer, just say that you don't know. Don't try to make up an answer.

                Context: {context}

                Question: {question}

                Answer:""",
                input_variables=["context", "question"]
            )
        }
    )
    
    return qa_chain

In [14]:
# Directory containing PDF files
pdf_dir = "./pdf_documents"

# Process PDFs and create vector store
vectorstore = process_pdfs(pdf_dir)



Processing PDFs...
Processing state of the art small spacecraft.pdf...


In [15]:
# Load vector Store
#
embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    ) 
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)


# Create QA chain
api_key = key.api_key
qa_chain = create_qa_chain_openai(vectorstore, api_key)
# qa_chain = create_qa_chain_openai(vectorstore)

# Example query
query = "What is ESPA class satellite? What is the peak power range in watts and which satellite has the highest peak power? Can you write a report on it?"
result = qa_chain({"query": query})
print("Query:", query)

print("Answer:", result["result"])
print("\nSource Documents:")
for doc in result["source_documents"]:
    print(f"- {doc.metadata['source']}, Page {doc.metadata['page']}")

Querying the vector store...
Query: What is ESPA class satellite? What is the peak power range in watts and which satellite has the highest peak power? Can you write a report on it?
Answer: An ESPA-class satellite refers to a type of satellite that utilizes the Evolved Expendable Launch Vehicle (EELV) Secondary Payload Adapter (SPA) or similar configurations. The ESPA ring, which is a key component of ESPA-class satellites, allows for the separation of the primary payload from the upper stage of the launch vehicle, enabling additional mounting allocations for secondary payloads. These secondary payloads can be mounted on the ESPA ring, providing opportunities for rideshare missions. ESPA-class satellites are designed to accommodate secondary payloads with masses typically under 500 kg, although some variants of the ESPA ring can support higher masses.

In terms of peak power range in watts, ESPA-class satellites have a variety of power outputs depending on the specific satellite. The p

In [16]:
result

{'query': 'What is ESPA class satellite? What is the peak power range in watts and which satellite has the highest peak power? Can you write a report on it?',
 'result': 'An ESPA-class satellite refers to a type of satellite that utilizes the Evolved Expendable Launch Vehicle (EELV) Secondary Payload Adapter (SPA) or similar configurations. The ESPA ring, which is a key component of ESPA-class satellites, allows for the separation of the primary payload from the upper stage of the launch vehicle, enabling additional mounting allocations for secondary payloads. These secondary payloads can be mounted on the ESPA ring, providing opportunities for rideshare missions. ESPA-class satellites are designed to accommodate secondary payloads with masses typically under 500 kg, although some variants of the ESPA ring can support higher masses.\n\nIn terms of peak power range in watts, ESPA-class satellites have a variety of power outputs depending on the specific satellite. The peak power output 

In [17]:
import textwrap
wrapped_text = textwrap.fill(result["result"], width=100,replace_whitespace=False, break_on_hyphens=False, drop_whitespace=True, fix_sentence_endings=True)
print(wrapped_text)

An ESPA-class satellite refers to a type of satellite that utilizes the Evolved Expendable Launch
Vehicle (EELV) Secondary Payload Adapter (SPA) or similar configurations.  The ESPA ring, which is a
key component of ESPA-class satellites, allows for the separation of the primary payload from the
upper stage of the launch vehicle, enabling additional mounting allocations for secondary payloads.
These secondary payloads can be mounted on the ESPA ring, providing opportunities for rideshare
missions.  ESPA-class satellites are designed to accommodate secondary payloads with masses
typically under 500 kg, although some variants of the ESPA ring can support higher masses.

In terms
of peak power range in watts, ESPA-class satellites have a variety of power outputs depending on the
specific satellite.  The peak power output can range from as low as 80 watts to as high as 4,500
watts.  Among the satellites listed in the provided information, the satellite with the highest peak
power output is