In [1]:
from pprint import pprint
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

def print_documents_metadata(documents):
    for i, document in enumerate(documents):
        print(f"Document {i}")
        pprint(document.metadata)

In [2]:
persist_directory = "/Users/dardonacci/Documents/work/ai-assistant/vector_stores"
embedding = OpenAIEmbeddings()

vectordb = Chroma(
    collection_name="hummingbot_documentation",
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [4]:
vectordb._collection.count()

9779

In [5]:
# Comparing similarity search with max marginal relevance search
query = """Can you explain me how to configure a pure market making strategy?"""

# Similarity Search

In [6]:
similar_documents = vectordb.similarity_search(query=query, k=10)
print_documents_metadata(similar_documents)

Document 0
{'titles': '#refreshing-orders\n'
           '#strategy-tier\n'
           '#pure_market_making\n'
           '#strategy-info\n'
           '#executing-order-proposals\n'
           '#description\n'
           '#architecture\n'
           '#summary\n'
           '#example-order-flow\n'
           '#supported-exchange-types\n'
           '#i-more-resources\n'
           '#strategy-configs',
 'url': 'https://docs.hummingbot.org/strategies/pure-market-making/'}
Document 1
{'titles': '#refreshing-orders\n'
           '#strategy-tier\n'
           '#pure_market_making\n'
           '#strategy-info\n'
           '#executing-order-proposals\n'
           '#description\n'
           '#architecture\n'
           '#summary\n'
           '#example-order-flow\n'
           '#supported-exchange-types\n'
           '#i-more-resources\n'
           '#strategy-configs',
 'url': 'https://docs.hummingbot.org/strategies/pure-market-making/'}
Document 2
{'titles': '#refreshing-orders\n'
       

# Max Marginal Relevance Search

In [55]:
mmr_documents = vectordb.max_marginal_relevance_search(query=query, k=5, fetch_k=10)
for i, document in enumerate(mmr_documents):
    print(f"Document {i}")
    pprint(document.page_content)

Document 0
('Pure Market Making - Hummingbot/nSkip to content/nHummingbot/nPure Market '
 'Making/nInitializing '
 'search/nhummingbot/hummingbot/nHome/nDocs/nAcademy/nBotcamp/nReleases/nBounties/nGovernance/nAbout/nBlog/nHummingbot/nhummingbot/hummingbot/nHome/nDocs/nDocs/nGetting '
 'Started/nInstallation/nClient/nGateway/nDashboard/nStrategies/nStrategies/nV2 '
 'Strategies/nV1 Strategies/nV1 Strategies/nPure Market Making/nPure Market '
 'Making/nTable of contents/n📁 Strategy Info/n🏆 Strategy Tier/n📝 Summary/n🏦 '
 'Supported Exchange Types/n🛠️ Strategy configs/n📓 '
 'Description/nArchitecture/nRefreshing Orders/nExecuting Order '
 'Proposals/nExample Order Flow/nℹ️ More Resources/nCross-Exchange Market '
 'Making/nAMM Arbitrage/nAvellaneda Market Making/nCross-Exchange '
 'Mining/nHedge/nLiquidity Mining/nPerpetual Market Making/nSpot Perpetual '
 'Arbitrage/nTWAP/nUniswap v3 LP/nPMM Strategy '
 'Configs/nScripts/nConnectors/nGlossary/nTroubleshooting/nAcademy/nAcademy/nLatest/nAll

# SelfQueryRetriever

Using Runnables to construct the query

In [47]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.llms.openai import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever


document_content_description = "Documentation of Hummingbot"
metadata_field_info = [
    AttributeInfo(
        name="url",
        description="The url where the information is collected",
        type="string",
    ),
    AttributeInfo(
        name="titles",
        description="The titles that are present in the page, all of them start with the symbol #. This field is important to understand the major topics that the documentation page includes.",
        type="string",
    ),
]

In [48]:
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    enable_limit=True
)

In [49]:
print_documents_metadata(retriever.get_relevant_documents(query, k=6))

Document 0
{'titles': '#refreshing-orders\n'
           '#strategy-tier\n'
           '#pure_market_making\n'
           '#strategy-info\n'
           '#executing-order-proposals\n'
           '#description\n'
           '#architecture\n'
           '#summary\n'
           '#example-order-flow\n'
           '#supported-exchange-types\n'
           '#i-more-resources\n'
           '#strategy-configs',
 'url': 'https://docs.hummingbot.org/strategies/pure-market-making/'}
Document 1
{'titles': '#refreshing-orders\n'
           '#strategy-tier\n'
           '#pure_market_making\n'
           '#strategy-info\n'
           '#executing-order-proposals\n'
           '#description\n'
           '#architecture\n'
           '#summary\n'
           '#example-order-flow\n'
           '#supported-exchange-types\n'
           '#i-more-resources\n'
           '#strategy-configs',
 'url': 'https://docs.hummingbot.org/strategies/pure-market-making/'}
Document 2
{'titles': '#refreshing-orders\n'
       

In [12]:
document_contents = "Hummingbot scripts"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm=llm,
    vectorstore=vectordb,
    document_contents=document_contents,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [13]:
relevant_docs = retriever.get_relevant_documents(query=query, k=2)

In [14]:
print_documents_metadata(relevant_docs)

Document 0
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 1
{'content_type': 'functions_classes',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/v2_directional-trading_macd_bb_v1.py'}
Document 2
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}
Document 3
{'content_type': 'simplified_code',
 'language': 'python',
 'source': '/Users/dardonacci/Documents/work/hummingbot/scripts/archived_scripts/examples_using_smart_components/directional_strategy_macd_bb.py'}


# Compressor

In [50]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.llms.openai import OpenAI

In [51]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr"),
    verbose=True
)

In [52]:
compressed_docs = compression_retriever.get_relevant_documents(query=query, k=6, fetch_k=15)



In [53]:
compressed_docs[3].page_content

'"From the beginning, our mission has been to democratize high-frequency trading with open source software." "As we wrote in the original Hummingbot whitepaper, market making is an important function critical to organic, efficient markets that should be decentralized to prevent the concentration risk that exists in traditional finance." "Market making is the act of simultaneously creating buy and sell orders for an asset in a market." "Market makers play an important role in providing liquidity to financial markets, especially in the highly fragmented cryptocurrency industry."'

In [54]:
len(compressed_docs)

4