In [None]:
#https://python.langchain.com/v0.2/docs/tutorials/rag/

In [1]:
! pip install langchain





In [10]:
! pip install tenacity==8.5.0

Collecting tenacity==8.5.0
  Obtaining dependency information for tenacity==8.5.0 from https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl.metadata
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Using cached tenacity-8.5.0-py3-none-any.whl (28 kB)
Installing collected packages: tenacity
  Attempting uninstall: tenacity
    Found existing installation: tenacity 8.1.0
    Uninstalling tenacity-8.1.0:
      Successfully uninstalled tenacity-8.1.0
Successfully installed tenacity-8.5.0




In [11]:
! pip install --quiet --upgrade langchain langchain-community langchain-chroma



In [12]:
! pip install -qU langchain-openai



In [56]:
#make sure to replace the api key with your own! i don't wanna be charged for too many calls LMAOO
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ""

In [55]:
#again, replace with your own open ai api key
#not sure how fast we're gonna use our free calls but we should prob create a local free rag application with ollama if goldhirsh doesn't give us funding soon
os.environ['OPENAI_API_KEY'] = ""

In [27]:
from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path

file_path='./idea-2024-new.json'
data = json.loads(Path(file_path).read_text(encoding='utf-8'))
loader = JSONLoader(
         file_path=file_path,
         jq_schema=".[].summary",
         text_content=True)

In [28]:
docs = loader.load()

In [29]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

874

In [31]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [38]:
retriever = vectorstore.as_retriever()

In [44]:
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("Which organization helps the homeless?")

'Vision Possible and Food on Foot are two organizations that help the homeless in Los Angeles. Vision Possible focuses on helping the homeless regain their lives and be productive citizens, while Food on Foot provides meals, supplies, job assistance, and housing support to those in need.'

In [50]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Which organizations help the homeless?"})
print(response["answer"])

print()
response2 = rag_chain.invoke({"input": "What is food on foot's IRS standing?"})
print(response2["answer"])

Vision Possible and Food on Foot are two organizations that work to help the homeless in Los Angeles. Vision Possible focuses on conducting a vision to help the homeless regain their lives and be productive citizens, while Food on Foot works to alleviate homelessness by providing meals, supplies, and assistance in securing jobs and housing.

I don't know.


In [51]:
for document in response["context"]:
    print(document)
    print()

page_content='Homelessness is not an individual issue. It is a community issue. Our organization, Vision Possible, aims to conduct a  vision to help the homeless regain their lives and be productive citizens like you and me.' metadata={'seq_num': 266, 'source': 'C:\\Users\\Vivien Chen\\Desktop\\BTT Goldhirsh1A\\idea-2024-new.json', 'start_index': 0}

page_content='This grant will go towards three separate projects/programs that have the same targeted audience – the homeless. The first project is to help reunify homeless people with their families/friends (if they so choose) through the creation of a homeless directory that would be available for the public to use, secondly, raise awareness about the daily struggles the homeless face in LA, and lastly, to promote homeless organizations with free ads.' metadata={'seq_num': 709, 'source': 'C:\\Users\\Vivien Chen\\Desktop\\BTT Goldhirsh1A\\idea-2024-new.json', 'start_index': 0}

page_content='Food on Foot works to alleviate homelessness in

In [52]:
#TO DO: add message history
#https://python.langchain.com/docs/how_to/message_history/

In [53]:
#TO DO: local rag application (so we can do more in depth testing without making too many open ai api calls) - using ollama?
#https://python.langchain.com/v0.2/docs/tutorials/local_rag/