In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load environment variables from .env file
load_dotenv()

# --- 1. Document Loading ---
# We'll load the PDF named "Starship.pdf"
file_path = "Starship.pdf"
loader = PyPDFLoader(file_path)

# Load the document. Langchain's loaders return a list of "Document" objects.
documents = loader.load()

# --- 2. Document Chunking ---
# Now, we split the loaded documents into smaller chunks.
# This is crucial for the RAG model as it helps in finding more specific context.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=150,
    length_function=len
)

# The split_documents method will process all our loaded documents.
doc_splits = text_splitter.split_documents(documents)

# Let's check how many chunks we've created
print(f"Your PDF has been split into {len(doc_splits)} chunks.")

# You can inspect a chunk to see its content and metadata
# print(doc_splits[5].page_content)
# print(doc_splits[5].metadata)

Your PDF has been split into 31 chunks.


In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# --- 3. Indexing and Storage (Creating the Vector Store) ---

# We'll use a powerful and popular open-source embedding model from HuggingFace.
# The "all-MiniLM-L6-v2" model is a great starting point - it's fast and effective.
embedding_model = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'} # Use CPU for embedding generation
)

# Now we create the FAISS vector store.
# This single command does a lot:
# 1. It takes our document chunks (doc_splits).
# 2. It uses the HuggingFace model to create an embedding for each chunk.
# 3. It stores all these embeddings in a FAISS index, ready for searching.
print("Creating the vector store... This may take a moment.")
vector_store = FAISS.from_documents(doc_splits, embedding_model)
print("Vector store created successfully!")

# To make our retriever accessible for later use, we can save it locally.
# This prevents us from having to re-process the PDF every time we run the notebook.
# vector_store.save_local("faiss_starship_index")

# To load it back up later, you would use:
# vector_store = FAISS.load_local("faiss_starship_index", embedding_model, allow_dangerous_deserialization=True)

  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


Creating the vector store... This may take a moment.
Vector store created successfully!


In [4]:
# --- 4. Retrieval ---

# A retriever is an interface that returns documents given an unstructured query.
# The most common type of retriever is one that is backed by a vector store.
# We can easily convert our vector store into a retriever.
retriever = vector_store.as_retriever()

# Let's test it out with a sample query.
# The retriever will perform a similarity search in the FAISS index.
# It will find the chunks whose embeddings are most similar to the query's embedding.
sample_query = "What is the purpose of the Starship's heat shield?"
relevant_docs = retriever.invoke(sample_query)

# Let's see what it found. The output will be a list of Document objects.
print(f"Retrieved {len(relevant_docs)} documents for the query: '{sample_query}'\n")

# We can inspect the content of the retrieved documents.
for i, doc in enumerate(relevant_docs):
    print(f"--- Document {i+1} ---\n")
    print(doc.page_content)
    print(f"\nSource: {doc.metadata.get('source', 'N/A')}, Page: {doc.metadata.get('page', 'N/A')}")
    print("\n" + "="*50 + "\n")

Retrieved 4 documents for the query: 'What is the purpose of the Starship's heat shield?'

--- Document 1 ---

Starship System Starship Spacecraft
• Starship's heat shield, composed of thousands of 
hexagonal black tiles that can withstand temperatures of 
2,600 °F, is designed to be used many times without 
maintenance between flights 
• The tiles are made of silica and are attached with pins 
rather than glued with small gaps in between to allow 
for thermal  expansion
• Tiles  hexagonal shape facilitate mass production and 
prevent hot plasma from causing severe damage to the 
vehicle
OLLI Fall 2023 11

Source: Starship.pdf, Page: 10


--- Document 2 ---

Super Heavy Booster and Starship Construction
• The stainless steel rings that make up the Starship 
system’s structure are welded together using laser 
welding or TIP-TIG welding
• Friction-stir welding is used on the aluminum 
material in the Falcon 9 
• Shapes of SS nose one panels has changed on recent 
prototypes as have confi

In [5]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder # <-- CORRECT IMPORT

# --- Implement a Reranker ---

# We instantiate the CrossEncoder model using LangChain's dedicated wrapper.
# This ensures compatibility with the rest of the LangChain ecosystem.
cross_encoder_model = HuggingFaceCrossEncoder(
    model_name='cross-encoder/ms-marco-MiniLM-L-6-v2',
    model_kwargs={'device': 'cpu'} # Use CPU
)

# The LangChain reranker wrapper uses our new LangChain-compatible model.
# It will re-sort the documents returned by the initial retriever.
reranker = CrossEncoderReranker(model=cross_encoder_model, top_n=2)

# The ContextualCompressionRetriever remains the same.
# It combines the base retriever and the reranker.
compression_retriever = ContextualCompressionRetriever(
    base_compressor=reranker,
    base_retriever=retriever # This is the FAISS retriever from the previous step
)

# Let's test the reranked retrieval
print("--- Testing Retrieval with Reranker ---")
sample_query = "What is the purpose of the Starship's heat shield?"
reranked_docs = compression_retriever.invoke(sample_query)

# Let's see the improved results
print(f"Reranked and retrieved {len(reranked_docs)} documents for the query: '{sample_query}'\n")

for i, doc in enumerate(reranked_docs):
    print(f"--- Reranked Document {i+1} ---\n")
    print(doc.page_content)
    print(f"\nSource: {doc.metadata.get('source', 'N/A')}, Page: {doc.metadata.get('page', 'N/A')}")
    print("\n" + "="*50 + "\n")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


--- Testing Retrieval with Reranker ---
Reranked and retrieved 2 documents for the query: 'What is the purpose of the Starship's heat shield?'

--- Reranked Document 1 ---

Starship System Starship Spacecraft
• Starship's heat shield, composed of thousands of 
hexagonal black tiles that can withstand temperatures of 
2,600 °F, is designed to be used many times without 
maintenance between flights 
• The tiles are made of silica and are attached with pins 
rather than glued with small gaps in between to allow 
for thermal  expansion
• Tiles  hexagonal shape facilitate mass production and 
prevent hot plasma from causing severe damage to the 
vehicle
OLLI Fall 2023 11

Source: Starship.pdf, Page: 10


--- Reranked Document 2 ---

Starship System Super Heavy
• The booster is equipped with four electrically 
actuated grid fins  ~ 6,600 lb each 
• Adjacent  pairs of grid fins are spaced sixty degrees apart
• The grid fins do not retract and remain extended during 
ascent
• The booster has c

In [8]:
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# --- 6. Generation ---

# First, we need to define the LLM we'll use for generation.
# We'll use Llama 3 running on Groq's fast inference engine.
# Make sure your GROQ_API_KEY is set in your .env file.
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=1  # Set to 0 for more deterministic, factual answers
)

# This template instructs the LLM on how to behave. It defines the input variables
# (like "context" and "question") and the structure of the prompt.
prompt_template = """
Use the following pieces of retrieved context to answer the question.
If you don't know the answer about Starship, just say that you don't know.
Keep the answer concise and based ONLY on the provided context.

Context: {context}

Question: {question}

Answer:
"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Now, we build the final RAG chain using LangChain Expression Language (LCEL).
# This chain will orchestrate the entire process from retrieval to generation.

def format_docs(docs):
    """A helper function to format the retrieved documents into a single string."""
    return "\n\n".join(doc.page_content for doc in docs)

# The RAG chain is defined as follows:
# 1. The user's question is passed to the retriever.
# 2. The retrieved documents are formatted into a single context string.
# 3. The question and context are passed to the prompt.
# 4. The formatted prompt is passed to the LLM.
# 5. The LLM's output is parsed into a string.

rag_chain = (
    {"context": compression_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Let's ask our question again, but this time through the full RAG chain.
final_answer = rag_chain.invoke(sample_query)

# Print the final, LLM-generated answer.
print("--- Final Answer ---")
print(final_answer)

--- Final Answer ---
The purpose of the Starship's heat shield is to withstand temperatures of 2,600 °F and be used many times without maintenance between flights.


In [10]:
# --- 7. User Interaction ---

# We can create a simple loop to interact with our chatbot.
print("RAG-Starship_ChatBot is ready. Type 'exit' to end the chat.")

while True:
    user_query = input("\nAsk a question about SpaceX's Starship: ")
    if user_query.lower() == 'exit':
        print("Thank you for using the Starship ChatBot!")
        break
    
    # Get the answer from our RAG chain
    answer = rag_chain.invoke(user_query)
    
    print("\n--- Answer ---")
    print(answer)

RAG-Starship_ChatBot is ready. Type 'exit' to end the chat.

--- Answer ---
There is no answer to the question "hey" since it is not a specific question about Starship.
Thank you for using the Starship ChatBot!
