In [86]:
#pip install -U langchain-openai
#pip install tiktoken
#pip install langchain openai faiss-cpu PyPDF2
#%pip install -qU langchain_community pypdf
#pip install streamlit


In [87]:
from langchain_openai import ChatOpenAI
import streamlit as st
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
import tempfile
import os
from langchain.prompts import PromptTemplate

In [88]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load your PDF
raw = "C:/Users/edward_b/github/interviews-env/interviews/app/static"
pdf_paths = [f"{raw}/Handbook Volume 2.pdf", f"{raw}/Handbook Volume 1.pdf"]
all_docs = []

for pdf_path in pdf_paths:
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    all_docs.extend(documents)


In [89]:
# Split it into manageable chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
chunks = splitter.split_documents(all_docs)

In [90]:
import getpass
import os

if not os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [91]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embedding_model = OpenAIEmbeddings(model='text-embedding-ada-002') #  model name
db = FAISS.from_documents(chunks, embedding_model)


In [107]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

retriever = db.as_retriever(search_kwargs={"k": 6})


In [121]:
template = """
You are an expert answering questions using only the context below. 
Provide detailed responses in organised bullet points to the questions based on the context.
If the answer is not in the context, say "I don't know".

Context:
{context}

Question:
{question}

Answer in 3-5 sentences:
"""

qa_prompt = PromptTemplate.from_template(template)

In [122]:
# Step 5: Set up RetrievalQA chain with source tracking
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(temperature=0.1, max_tokens=500, model_name='gpt-4o-mini'),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
     chain_type_kwargs={"prompt": qa_prompt}

)

'''
# Step 6: Ask a question
query = "Discuss school feeding programs and how they are treated in the Handbook."
result = qa_chain.invoke({"query": query})

# Step 7: Print the result
#print("📌 Answer:\n", result["result"])
#print("\n📚 Source Documents:")
for i, doc in enumerate(result["source_documents"], 1):
    #print(f"\n--- Source {i} ---")
    #print(doc.page_content)
'''

'\n# Step 6: Ask a question\nquery = "Discuss school feeding programs and how they are treated in the Handbook."\nresult = qa_chain.invoke({"query": query})\n\n# Step 7: Print the result\n#print("📌 Answer:\n", result["result"])\n#print("\n📚 Source Documents:")\nfor i, doc in enumerate(result["source_documents"], 1):\n    #print(f"\n--- Source {i} ---")\n    #print(doc.page_content)\n'

In [123]:
query = "what do we know about the corruption of different types of programs?" 
result = qa_chain({"query": query})

print("\nAnswer:\n", result["result"])
print("\nSources:")
for i, doc in enumerate(result["source_documents"], 1):
    print(f"\nSource {i}:\n{doc.page_content}")  # Short preview


Answer:
 - Corruption can significantly impact the efficacy of various programs, particularly in the context of environmental regulation and antipoverty initiatives.
- In the environmental auditing example by Duﬂo et al. (2013), auditors under the status quo system were found to systematically underreport emissions due to conflicts of interest, while a restructured payment system led to more truthful reporting.
- Antipoverty programs can also suffer from corruption and inefficiencies if not implemented properly, as highlighted by the challenges in the Bono de Desarrollo Humano (BDH) program in Ecuador, where conditionality was not enforced, leading to different behavioral responses among beneficiaries.
- Additionally, research indicates that individuals aspiring for public sector jobs may exhibit higher levels of corruption compared to those in the private sector, suggesting a need for careful personnel policy design to mitigate corruption risks.

Sources:

Source 1:
they audit.Duﬂo e

In [81]:
result

{'query': 'What are the benefits of conditional cash transfers?',
 'result': 'Conditional cash transfers (CCTs) have several benefits, including:\n1. **Improving Education**: CCT programs can incentivize families to send their children to school regularly by conditioning cash transfers on school attendance. This can lead to higher educational outcomes.\n2. **Enhancing Health**: CCTs can encourage families to invest in preventive health measures by linking cash transfers to health-related conditions. This can lead to improved health outcomes.\n3. **Reducing Poverty**: By providing cash transfers to families in need, CCT programs can help alleviate poverty and improve the overall well-being of households.\n4. **Behavioral Changes**: CCTs can promote positive behavioral changes, such as increased school attendance, better health practices, and overall improved decision-making within households.\n5. **Social Protection**: Conditional cash transfers can act as a form of social protection, e

In [None]:
while True:
    query = input("Ask something (or type 'exit'): ")
    if query.lower() in ["exit", "quit"]:
        break
    else:
        with st.spinner("thinking..."):
            result = qa_chain({"query": query})
            print("\nAnswer:\n", result["result"])
            print("\nSources:")
            for i, doc in enumerate(result["source_documents"], 1):
                print(f"\nSource {i}:\n{doc.page_content[:300]}...")  # Short preview