In [6]:
import os
from langchain_openai import ChatOpenAI

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 

chat_model = ChatOpenAI(model="gpt-5-nano-2025-08-07", temperature=0, openai_api_key=OPENAI_API_KEY)

#### Initialize Embedding Model

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY)

#### Load PDF Document

In [8]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("Research.pdf")

docs = loader.load()

In [4]:
len(docs)

3

In [5]:
docs[0]

Document(metadata={'producer': 'Skia/PDF m140', 'creator': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36', 'creationdate': '2025-09-23T15:50:49+00:00', 'source': 'Research.pdf', 'file_path': 'Research.pdf', 'total_pages': 3, 'format': 'PDF 1.4', 'title': 'Neural Network Pruning Research Proposal', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-09-23T15:50:49+00:00', 'trapped': '', 'modDate': "D:20250923155049+00'00'", 'creationDate': "D:20250923155049+00'00'", 'page': 0}, page_content='Neural Network Pruning Research Proposal\n1. Introduction & Context\nThe Challenge of Model Efficiency\nModern deep neural networks have achieved remarkable performance across various domains, from\ncomputer vision to natural language processing. However, their success comes at a significant\ncomputational cost. State-of-the-art models like GPT-4, BERT-Large, or ResNet-152 contain millions to\nbillions of parameters, requir

#### Split Document into chunks

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

splits = text_splitter.split_documents(docs)

In [7]:
len(splits)

15

In [8]:
splits[0]

Document(metadata={'producer': 'Skia/PDF m140', 'creator': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36', 'creationdate': '2025-09-23T15:50:49+00:00', 'source': 'Research.pdf', 'file_path': 'Research.pdf', 'total_pages': 3, 'format': 'PDF 1.4', 'title': 'Neural Network Pruning Research Proposal', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-09-23T15:50:49+00:00', 'trapped': '', 'modDate': "D:20250923155049+00'00'", 'creationDate': "D:20250923155049+00'00'", 'page': 0}, page_content='Neural Network Pruning Research Proposal\n1. Introduction & Context\nThe Challenge of Model Efficiency\nModern deep neural networks have achieved remarkable performance across various domains, from\ncomputer vision to natural language processing. However, their success comes at a significant\ncomputational cost. State-of-the-art models like GPT-4, BERT-Large, or ResNet-152 contain millions to')

#### Create Vector Store and Retriver

In [10]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model)

In [11]:
# Create a retriever from the vectorstore
retriever = vectorstore.as_retriever()

#### Define Prompt Template

In [19]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a helpful AI assistant that helps people find information "
    "from a set of documents. Use the following pieces of context to answer "
    "the question at the end. If you don't know the answer, just say that you "
    "don't know, don't try to make up an answer."
    "\n\n"
    "{context}"
)

# NOTE: RetrievalQA passes the user's question under the variable name 'question'
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{question}"),
    ]
)


In [12]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are a helpful AI assistant that helps people find information from a set of documents. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

#### Create Retrieval-Augmented Generation (RAG) chain

In [20]:
from langchain_classic.chains import RetrievalQA

# RAG chain (compatible with your installed LangChain)
rag_chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
)


#### Invoke RAG Chain with example questions


In [24]:
# RetrievalQA expects the key 'query'
response = rag_chain.invoke({"query": "What is longest river in the world?"})

# Show what came back
print("response keys:", list(response.keys()))
print("answer:\n", response.get("result"))


response keys: ['query', 'result']
answer:
 I don’t know the answer from the provided documents—the materials you shared are about neural network pruning and don’t contain information about rivers. If you’d like, I can summarize the pruning proposal or help with related questions.


#### Add chat history (For keeping continues chat)

In [30]:
from langchain_classic.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# Define the contextualize system prompt
contextualize_system_prompt = (
    "Using the conversation history and the latest user input, "
    "reformulate the input into a standalone question if needed; "
    "otherwise return it as-is."
)

# IMPORTANT: create_history_aware_retriever expects the variable name 'input'
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

# Create the history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    chat_model,
    retriever,
    contextualize_prompt,
)


#### Create history aware RAG chain

In [40]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Prompt used for answering, with chat history included
history_system_prompt = (
    "You are a helpful AI assistant that helps people find information "
    "from a set of documents. Use the following pieces of context to answer "
    "the user input at the end. If you don't know the answer, just say that you "
    "don't know."
    "\n\n"
    "{context}"
)

history_qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", history_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

history_qa_prompt


ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [41]:
from langchain_core.runnables import RunnableLambda
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Build a history-aware RAG runnable.
# Note: RetrievalQA.from_chain_type can't accept history_aware_retriever because it's a Runnable
# (it needs both 'input' and 'chat_history'), not a BaseRetriever.

def _history_aware_rag(inputs: dict) -> dict:
    user_input = inputs["input"]
    chat_history = inputs.get("chat_history", [])

    docs = history_aware_retriever.invoke({"input": user_input, "chat_history": chat_history})
    context = "\n\n".join(d.page_content for d in docs[:4])

    messages = history_qa_prompt.format_messages(
        context=context,
        input=user_input,
        chat_history=chat_history,
    )
    answer_msg = chat_model.invoke(messages)
    answer = getattr(answer_msg, "content", str(answer_msg))

    return {"answer": answer, "source_documents": docs}

history_aware_rag_chain = RunnableLambda(_history_aware_rag)

# ---- Session history store ----
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    history_aware_rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


#### 
Manage Chat Session History

In [42]:
session_id = "101"

resp1 = conversational_rag_chain.invoke(
    {"input": "Give me a summary of the document."},
    config={"configurable": {"session_id": session_id}},
)
print("Answer 1:\n", resp1["answer"])

print("\n--- Follow-up (same session) ---\n")
resp2 = conversational_rag_chain.invoke(
    {"input": "What are the key contributions?"},
    config={"configurable": {"session_id": session_id}},
)
print("Answer 2:\n", resp2["answer"])


Answer 1:
 - The document outlines a research program on adaptive pruning of neural networks that dynamically adjusts pruning decisions during training.
- It proposes a Comprehensive Evaluation Framework to systematically compare pruning techniques, using baselines such as magnitude-based pruning, structured pruning, and lottery ticket methods; and evaluating with metrics like model size reduction, inference speed, accuracy retention, and training efficiency.
- It includes a Software Implementation component: an open-source tool/framework for adaptive pruning.
- It presents Empirical Insights on how different adaptation strategies affect model performance and generalization.
- Expected Contributions: a novel adaptive pruning algorithm and a software engineering implementation to support it.
- Research Scope and Boundaries: focus on developing adaptive pruning methods with an emphasis on software implementation; target computer vision architectures (ResNet, EfficientNet) and potentially