In [1]:
import configparser
import os

config = configparser.RawConfigParser()
config.read('keys.config')
os.environ["ACTIVELOOP_TOKEN"] = config.get('keys', 'active_loop_key')
os.environ["OPENAI_API_KEY"] = config.get('keys', 'open_ai_key')


In [19]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.

# instantiate the LLM and embeddings models
llm = OpenAI(model="text-davinci-003", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

f = open('energy_week2.txt')
row = f.readlines()

texts = []
metadatas = []
for r in row:
    if '•	' in r:
        texts.append(r.replace('•	', ''))
        metadatas.append({"query": ""})

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separators="\n")
docs = text_splitter.create_documents(texts, metadatas=metadatas)

# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "braduck" 
my_activeloop_dataset_name = "espresso"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.delete(delete_all=True)
db.add_documents(docs)

Deep Lake Dataset in hub://braduck/espresso already exists, loading from the storage
Your Deep Lake dataset has been successfully created!


/

Dataset(path='hub://braduck/espresso', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape       dtype  compression
  -------    -------     -------     -------  ------- 
 embedding  embedding  (1824, 1536)  float32   None   
    id        text      (1824, 1)      str     None   
 metadata     json      (1824, 1)      str     None   
   text       text      (1824, 1)      str     None   


 

['a9340bd2-267e-11ee-933a-be20711fe3ae',
 'a9340c4a-267e-11ee-933a-be20711fe3ae',
 'a9340c68-267e-11ee-933a-be20711fe3ae',
 'a9340c72-267e-11ee-933a-be20711fe3ae',
 'a9340c86-267e-11ee-933a-be20711fe3ae',
 'a9340c9a-267e-11ee-933a-be20711fe3ae',
 'a9340ca4-267e-11ee-933a-be20711fe3ae',
 'a9340cb8-267e-11ee-933a-be20711fe3ae',
 'a9340cc2-267e-11ee-933a-be20711fe3ae',
 'a9340cd6-267e-11ee-933a-be20711fe3ae',
 'a9340ce0-267e-11ee-933a-be20711fe3ae',
 'a9340cf4-267e-11ee-933a-be20711fe3ae',
 'a9340cfe-267e-11ee-933a-be20711fe3ae',
 'a9340d12-267e-11ee-933a-be20711fe3ae',
 'a9340d1c-267e-11ee-933a-be20711fe3ae',
 'a9340d30-267e-11ee-933a-be20711fe3ae',
 'a9340d44-267e-11ee-933a-be20711fe3ae',
 'a9340d4e-267e-11ee-933a-be20711fe3ae',
 'a9340d62-267e-11ee-933a-be20711fe3ae',
 'a9340d6c-267e-11ee-933a-be20711fe3ae',
 'a9340d80-267e-11ee-933a-be20711fe3ae',
 'a9340d8a-267e-11ee-933a-be20711fe3ae',
 'a9340d9e-267e-11ee-933a-be20711fe3ae',
 'a9340da8-267e-11ee-933a-be20711fe3ae',
 'a9340dbc-267e-

In [31]:
from langchain import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain import LLMChain
from langchain.chains import create_qa_with_sources_chain
from langchain.chat_models import ChatOpenAI

# Load Sample
f = open('sample_espresso.txt', 'r')
rows = f.readlines()

sample = []
for r  in rows:
    if "Category:" in r: # New Category
        sample.append({"query": r})
    else:
        if sample[-1].get("answer", None) is None:
            sample[-1]["answer"] = ""
        
        sample[-1]["answer"] += r

# create an example template
example_template = """
User: {query}
AI: {answer}
"""

# create a prompt example from above template
example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

# now break our previous prompt into a prefix and suffix
# the prefix is our instructions
prefix = """The following is a template for Energy-related News, grouped by category.\
    Use this template as a sample to pick Energy-related News from documents.
"""
# and the suffix our user input and output indicator
suffix = """
User: {query}
AI: """
suffix = "{context}"

chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.0, max_tokens=8192)

for r in sample:
    print("Category", r['query'])
    # now create the few-shot prompt template
    few_shot_prompt_template = FewShotPromptTemplate(
        examples=[{"query": r['query'], "answer": r['answer']}],
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["context"],
        example_separator="\n\n"
    )
    qa_chain = create_qa_with_sources_chain(llm)
    final_qa_chain = LLMChain(
        llm=llm,
        prompt=few_shot_prompt_template
    )
    chain_type_kwargs = {"prompt": few_shot_prompt_template,'verbose': True}
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":5})
    retrieval_qa = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type_kwargs=chain_type_kwargs
    )
    query = f"""
    Bring the most relevant paragraphs according to few shot examples.\
    Rules:\
    - Minimum of 2 and Max of 5 paragraphs
    - Bring the most relevant paragrapsh, based on:
        - Words related to Energy (Crude Oil, Natural Gas, Electricity)
        - Words related to the Category {k}
        - Lenght of the paragraph
    """
    retrieval_qa.run(query)

    # chain = LLMChain(llm=chat, prompt=few_shot_prompt_template)
    # chain.run("Generate 3 new random headlines for the Category Global Macro")


Category Category: Geopolitics



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a template for Energy-related News, grouped by category.    Use this template as a sample to pick Energy-related News from documents.



User: Category: Geopolitics

AI: OPEC meeting on June 4th.
International inspectors report Iran has re-installed monitoring equipment even as its highly-enriched uranium inventory surged 30% last quarter



Briefs on Natural Gas: Gold Strategy US Clean Energy:

Good collection of @WSJ energy briefs on solar's record-breaking 1st quarter, Freeport LNG blast's 1-year anniversary, electric cars almost reaching 1% of the US market and more $BOIL #natgas

"the importance of energy in the PPI decline in recent months and how, together with the drop in..." via @eToro $OIL $NATGAS

Only a few sectors can buy prompt gas (tomorrow/next month) in a flexible way as their processes allow ramping up/down produc