In [2]:
import configparser
import os

config = configparser.RawConfigParser()
config.read('keys.config')
os.environ["ACTIVELOOP_TOKEN"] = config.get('keys', 'active_loop_key')
os.environ["OPENAI_API_KEY"] = config.get('keys', 'open_ai_key')


In [None]:
# %pip install --upgrade langchain==0.0.208 deeplake openai typing_extensions==4.5.0

In [3]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.

# instantiate the LLM and embeddings models
llm = OpenAI(model="text-davinci-003", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

f = open('energy_week2.txt')
row = f.readlines()

texts = []
metadatas = []
for r in row:
    if '•	' in r:
        texts.append(r.replace('•	', ''))
        metadatas.append({"query": ""})

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separators="\n")
docs = text_splitter.create_documents(texts, metadatas=metadatas)

# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "braduck" 
my_activeloop_dataset_name = "espresso"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.delete(delete_all=True)
db.add_documents(docs)



Deep Lake Dataset in hub://braduck/espresso already exists, loading from the storage
Your Deep Lake dataset has been successfully created!


/

Dataset(path='hub://braduck/espresso', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape       dtype  compression
  -------    -------     -------     -------  ------- 
 embedding  embedding  (1824, 1536)  float32   None   
    id        text      (1824, 1)      str     None   
 metadata     json      (1824, 1)      str     None   
   text       text      (1824, 1)      str     None   


 

['f0546a0c-2827-11ee-8546-be20711fe3ae',
 'f0546a84-2827-11ee-8546-be20711fe3ae',
 'f0546a98-2827-11ee-8546-be20711fe3ae',
 'f0546aac-2827-11ee-8546-be20711fe3ae',
 'f0546ab6-2827-11ee-8546-be20711fe3ae',
 'f0546ade-2827-11ee-8546-be20711fe3ae',
 'f0546ae8-2827-11ee-8546-be20711fe3ae',
 'f0546afc-2827-11ee-8546-be20711fe3ae',
 'f0546b06-2827-11ee-8546-be20711fe3ae',
 'f0546b1a-2827-11ee-8546-be20711fe3ae',
 'f0546b24-2827-11ee-8546-be20711fe3ae',
 'f0546b38-2827-11ee-8546-be20711fe3ae',
 'f0546b42-2827-11ee-8546-be20711fe3ae',
 'f0546b56-2827-11ee-8546-be20711fe3ae',
 'f0546b60-2827-11ee-8546-be20711fe3ae',
 'f0546b74-2827-11ee-8546-be20711fe3ae',
 'f0546b7e-2827-11ee-8546-be20711fe3ae',
 'f0546b92-2827-11ee-8546-be20711fe3ae',
 'f0546b9c-2827-11ee-8546-be20711fe3ae',
 'f0546bb0-2827-11ee-8546-be20711fe3ae',
 'f0546bba-2827-11ee-8546-be20711fe3ae',
 'f0546bc4-2827-11ee-8546-be20711fe3ae',
 'f0546bd8-2827-11ee-8546-be20711fe3ae',
 'f0546be2-2827-11ee-8546-be20711fe3ae',
 'f0546bf6-2827-

In [8]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever
)

In [13]:
query = "What are the 4 most relevants paragraphs related to Enery and Global Macro?"
matching_docs = db.similarity_search(query=query)
print(matching_docs)

[Document(page_content='9/ Conclusions 2: Up to a third of the total global demand for e-fuels and e-chemicals may be traded in the future, leading to more diversified energy markets thanks to wide availability of renewable electricity.', metadata={'query': ''}), Document(page_content='In these seven countries, the four most energy-intensive industries account for 62 percent to 71 percent of total industrial gas demand, and 43 percent to 66 percent of industrial electricity consumption (source: Bruegel based on Eurostat). This policy brief by @GSgaravatti', metadata={'query': ''}), Document(page_content='These are all CONSERVATIVE estimates. I cant stress enough how important that is. They translate to individual commodities as well such as and to some extent #naturalgas which I think are all pretty much in the same part of the cycle.', metadata={'query': ''}), Document(page_content='"the importance of energy in the PPI decline in recent months and how, together with the drop in..." vi

In [11]:
query = f"""
Bring the most relevant paragraphs related to Energy and Geopolitics, according to rules below.\
Rules:\
- Minimum of 3 and Max of 5 paragraphs
- Bring the most relevant paragrapsh, based on:
    - Words related to Energy (Crude Oil, Natural Gas, Electricity)
    - Lenght of the paragraph
"""

# result = retrieval_qa({"query": query})
# print(result)
retrieval_qa.run("What are the 4 most relevants paragraphs related to Enery and Geopolitics?")


' The first paragraph provides information about the four most energy-intensive industries in seven countries and their respective gas and electricity consumption. The second paragraph discusses the importance of energy in the PPI decline in recent months and how it is related to the drop in oil and natural gas prices.'