In [35]:
from pprint import pprint
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

def print_documents_metadata(documents):
    for i, document in enumerate(documents):
        print(f"Document {i}")
        pprint(document.metadata)

In [36]:
# Replace with persist_directory location from 01_load_hummingbot_docs.ipnynb
persist_directory = "/Users/feng/Code/ai-assistant/vector_stores"

embedding = OpenAIEmbeddings()

vectordb = Chroma(
    collection_name="hummingbot_documentation",
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [37]:
vectordb._collection.count()

11758

In [38]:
# Comparing similarity search with max marginal relevance search
query = """Can you explain me how to configure a pure market making strategy?"""

# Similarity Search

In [39]:
similar_documents = vectordb.similarity_search(query=query, k=10)
print_documents_metadata(similar_documents)

Document 0
{'titles': '#strategy-info\n'
           '#architecture\n'
           '#i-more-resources\n'
           '#refreshing-orders\n'
           '#strategy-tier\n'
           '#example-order-flow\n'
           '#executing-order-proposals\n'
           '#supported-exchange-types\n'
           '#pure_market_making\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description',
 'url': 'https://hummingbot.org/strategies/pure-market-making/'}
Document 1
{'titles': '#strategy-info\n'
           '#architecture\n'
           '#i-more-resources\n'
           '#refreshing-orders\n'
           '#strategy-tier\n'
           '#example-order-flow\n'
           '#executing-order-proposals\n'
           '#supported-exchange-types\n'
           '#pure_market_making\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description',
 'url': 'https://hummingbot.org/strategies/pure-market-making/'}
Document 2
{'titles': '#how-to-configure\n#list-of-configs',
 

# Max Marginal Relevance Search

In [40]:
mmr_documents = vectordb.max_marginal_relevance_search(query=query, k=5, fetch_k=10)
for i, document in enumerate(mmr_documents):
    print(f"Document {i}")
    pprint(document.page_content)

Document 0
('Pure Market Making - Hummingbot/nSkip to content/nHummingbot/nPure Market '
 'Making/nInitializing '
 'search/nhummingbot/hummingbot/nHome/nDocs/nAcademy/nBotcamp/nReleases/nBounties/nGovernance/nAbout/nBlog/nHummingbot/nhummingbot/hummingbot/nHome/nDocs/nDocs/nGetting '
 'Started/nInstallation/nClient/nGateway/nDashboard/nStrategies/nStrategies/nV2 '
 'Strategies/nV1 Strategies/nV1 Strategies/nPure Market Making/nPure Market '
 'Making/nTable of contents/n📁 Strategy Info/n🏆 Strategy Tier/n📝 Summary/n🏦 '
 'Supported Exchange Types/n🛠️ Strategy configs/n📓 '
 'Description/nArchitecture/nRefreshing Orders/nExecuting Order '
 'Proposals/nExample Order Flow/nℹ️ More Resources/nCross-Exchange Market '
 'Making/nAMM Arbitrage/nAvellaneda Market Making/nCross-Exchange '
 'Mining/nHedge/nLiquidity Mining/nPerpetual Market Making/nSpot Perpetual '
 'Arbitrage/nTWAP/nUniswap v3 LP/nPMM Strategy '
 'Configs/nScripts/nConnectors/nGlossary/nTroubleshooting/nAcademy/nAcademy/nLatest/nAll

# SelfQueryRetriever

Using Runnables to construct the query

In [41]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.llms.openai import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever


document_content_description = "Documentation of Hummingbot"
metadata_field_info = [
    AttributeInfo(
        name="url",
        description="The url where the information is collected",
        type="string",
    ),
    AttributeInfo(
        name="titles",
        description="The titles that are present in the page, all of them start with the symbol #. This field is important to understand the major topics that the documentation page includes.",
        type="string",
    ),
]

In [42]:
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    enable_limit=True
)

In [43]:
print_documents_metadata(retriever.get_relevant_documents(query, k=6))

Document 0
{'titles': '#strategy-info\n'
           '#architecture\n'
           '#i-more-resources\n'
           '#refreshing-orders\n'
           '#strategy-tier\n'
           '#example-order-flow\n'
           '#executing-order-proposals\n'
           '#supported-exchange-types\n'
           '#pure_market_making\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description',
 'url': 'https://hummingbot.org/strategies/pure-market-making/'}
Document 1
{'titles': '#strategy-info\n'
           '#strategy-tier\n'
           '#exchanges-supported\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description\n'
           '#cross-exchange-mining',
 'url': 'https://hummingbot.org/strategies/cross-exchange-mining/'}
Document 2
{'titles': '#who-runs-the-hummingbot-foundation\n'
           '#why-is-hummingbot-open-source\n'
           '#what-is-gateway\n'
           '#hummingbot-foundation\n'
           '#what-does-the-hummingbot-foundation-do\n'
 

In [44]:
document_contents = "Hummingbot scripts"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm=llm,
    vectorstore=vectordb,
    document_contents=document_contents,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [45]:
relevant_docs = retriever.get_relevant_documents(query=query, k=2)

In [46]:
print_documents_metadata(relevant_docs)

Document 0
{'titles': '#strategy-info\n'
           '#architecture\n'
           '#i-more-resources\n'
           '#refreshing-orders\n'
           '#strategy-tier\n'
           '#example-order-flow\n'
           '#executing-order-proposals\n'
           '#supported-exchange-types\n'
           '#pure_market_making\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description',
 'url': 'https://hummingbot.org/strategies/pure-market-making/'}
Document 1
{'titles': '#strategy-info\n'
           '#strategy-tier\n'
           '#exchanges-supported\n'
           '#strategy-configs\n'
           '#summary\n'
           '#description\n'
           '#cross-exchange-mining',
 'url': 'https://hummingbot.org/strategies/cross-exchange-mining/'}
Document 2
{'titles': '#who-runs-the-hummingbot-foundation\n'
           '#why-is-hummingbot-open-source\n'
           '#what-is-gateway\n'
           '#hummingbot-foundation\n'
           '#what-does-the-hummingbot-foundation-do\n'
 

# Compressor

In [47]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.llms.openai import OpenAI

In [48]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr"),
    verbose=True
)

In [49]:
compressed_docs = compression_retriever.get_relevant_documents(query=query, k=6, fetch_k=15)



In [50]:
compressed_docs[3].page_content

'"For starters, in general interaction with them is less reliable. Unlike in case of centralized exchanges, for example obtaining an asset price from a DEX may occasionally fail. For this reason many operations on a DEX may have to be repeated until they\'re executed successfully./nAnother difference is dependence of transaction fees on currrent gas fees. Therefore taker transaction fees may vary and therefore also position profitability checks performed in the method/ncheck_if_still_profitable()/nmay return different results at different times for the same maker positions."'

In [51]:
len(compressed_docs)

4