In [1]:
import configparser
import os

config = configparser.RawConfigParser()
config.read('keys.config')
os.environ["ACTIVELOOP_TOKEN"] = config.get('keys', 'active_loop_key')
os.environ["OPENAI_API_KEY"] = config.get('keys', 'open_ai_key')


In [2]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import DeepLake



In [5]:
loader = TextLoader("state_of_the_union.txt", encoding="utf-8")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
for i, text in enumerate(texts):
    text.metadata["source"] = f"{i}-pl"
embeddings = OpenAIEmbeddings()
docsearch = DeepLake.from_documents(texts, embeddings)

# # create Deep Lake dataset
# # TODO: use your organization id here. (by default, org id is your username)
# my_activeloop_org_id = "braduck" 
# my_activeloop_dataset_name = "espresso"
# dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
# db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# # add documents to our Deep Lake dataset
# db.delete(delete_all=True)
# db.add_documents(docsearch)


Deep Lake Dataset in ./deeplake/ already exists, loading from the storage


Dataset(path='./deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape      dtype  compression
  -------    -------    -------    -------  ------- 
 embedding  embedding  (84, 1536)  float32   None   
    id        text      (84, 1)      str     None   
 metadata     json      (84, 1)      str     None   
   text       text      (84, 1)      str     None   




In [6]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.chains import create_qa_with_sources_chain

In [7]:
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

In [8]:
qa_chain = create_qa_with_sources_chain(llm)

In [9]:
doc_prompt = PromptTemplate(
    template="Content: {page_content}\nSource: {source}",
    input_variables=["page_content", "source"],
)

In [10]:
final_qa_chain = StuffDocumentsChain(
    llm_chain=qa_chain,
    document_variable_name="context",
    document_prompt=doc_prompt,
)

In [11]:
retrieval_qa = RetrievalQA(
    retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain
)

In [12]:
query = "What did the president say about russia"

In [13]:
retrieval_qa.run(query)

'{\n  "answer": "The President announced that the United States will join its allies in closing off American airspace to all Russian flights, further isolating Russia and adding economic pressure. The Russian economy has been severely impacted, with the Ruble losing 30% of its value and the stock market losing 40%. The President also mentioned providing support to Ukraine in their fight for freedom, including military, economic, and humanitarian assistance. The United States is giving more than $1 billion in direct assistance to Ukraine. The President clarified that U.S. forces are not engaged in conflict with Russian forces in Ukraine, but are deployed to defend NATO allies in case of further aggression from Russia.",\n  "sources": ["0-pl", "4-pl"]\n}'