In [None]:
import os

from pprint import pprint
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"]  # you should see the api key if the .env file is loaded correctly

In [2]:
def print_documents_metadata(documents):
    for i, document in enumerate(documents):
        print(f"Document {i}")
        pprint(document.metadata)

In [6]:
persist_directory = "/Users/dardonacci/Documents/dardonacci/2-Code/metabrain/vector_stores/hummingbot/chroma/"  # Put the directory where you want to persist the vectorstore

embedding = OpenAIEmbeddings()

vectordb = Chroma(
    collection_name="hummingbot_scripts",
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [7]:
# Comparing similarity search with max marginal relevance search
query = """I would like to create a Directional Strategy using Hummingbot using the MACDBB controller. Can you the script for me and suggest values for the indicators?
"""

# Similarity Search

In [8]:
similar_documents = vectordb.similarity_search(query=query, k=10)
print_documents_metadata(similar_documents)

Document 0
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 1
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 2
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}
Document 3
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}
Document 4
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 5
{'content_type': 'functions_classe

# Max Marginal Relevance Search

In [9]:
mmr_documents = vectordb.max_marginal_relevance_search(query=query, k=5, fetch_k=3)
for i, document in enumerate(mmr_documents):
    print(f"Document {i}")
    pprint(document.metadata)

Document 0
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 1
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 2
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}


In [10]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [11]:
metadata_field_info = [
    AttributeInfo(
        name="content_type",
        description="Can be `simplified_code` or `function_classes` for the same file name. Include the two of them for each source filename",
        type="string",
    ),
]

In [12]:
document_contents = "Hummingbot scripts"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm=llm,
    vectorstore=vectordb,
    document_contents=document_contents,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [13]:
relevant_docs = retriever.get_relevant_documents(query=query, k=2)

In [14]:
print_documents_metadata(relevant_docs)

Document 0
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 1
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 2
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}
Document 3
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}


# Compressor

In [15]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [16]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr"),
    verbose=True
)

In [17]:
compressed_docs = compression_retriever.get_relevant_documents(query=query, k=2)



In [18]:
print_documents_metadata(compressed_docs)

Document 0
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 1
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/community_scripts/microprice_calculator.py'}
Document 2
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/macd_bb_directional_strategy.py'}
