In [1]:
import logging
import os
from typing import Iterator, Optional
import openai

from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.callbacks.base import CallbackManager
from langchain.chat_models import ChatOpenAI
import chainlit as cl
from dotenv import load_dotenv

from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor
from llama_index.chat_engine.types import BaseChatEngine, ChatMode

from llama_index import (
    LLMPredictor,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    set_global_service_context,
)
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader

from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
    EntityExtractor,
)
from llama_index.text_splitter import TokenTextSplitter
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.schema import MetadataMode
from llama_index import VectorStoreIndex
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata




In [2]:
# obtain gpt model name from environment variables
gpt_model = "gpt-4"
gpt_temperature = 0.7

In [3]:
document_list = [
	'../quant_scraper/docs/vbt_pro/cookbook.md',
	'../quant_scraper/docs/vbt_pro/documentation.md',
]

## With Metadata Extraction

In [18]:
llm_indexer = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

text_splitter = TokenTextSplitter(separator="\n## ", chunk_size=1024, chunk_overlap=0)

metadata_extractor = MetadataExtractor(
    extractors=[
        # TitleExtractor(nodes=3, llm=llm_indexer),
        # KeywordExtractor(keywords=3, llm=llm_indexer),
        # EntityExtractor(prediction_threshold=0.5, llm=llm_indexer),
        # SummaryExtractor(summaries=["prev", "self"], llm=llm_indexer),
        QuestionsAnsweredExtractor(questions=5, llm=llm_indexer),
    ],
)

In [19]:
node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    # metadata_extractor=metadata_extractor,
)

documents = SimpleDirectoryReader(input_files=document_list).load_data()

In [20]:
index_nodes = node_parser.get_nodes_from_documents(documents, show_progress=True)

Parsing documents into nodes: 100%|██████████| 55/55 [00:00<00:00, 404.19it/s]


In [22]:
print(index_nodes[0].text)
print(index_nodes[-1].text)

Cookbook

This is a repository for short and sweet examples and links for useful
VectorBT PRO recipes. Simplified, condensed, new-user friendly, in-line
examples have been inserted where possible to augment the tutorials, the
documentation, and the API. We encourage users to add to this documentation.

Imports required by the code examples

    
    
    import numpy as np
    import pandas as pd
    from numba import njit
    import vectorbtpro as vbt
Summary

Kudos for following me all the way down here! The classes that we just covered
build a strong foundation for data analysis with vectorbt; they implement
design patterns that are encountered in most other places across the codebase,
which makes them very easy to recognize and extend. In fact, the most hard-
core class
Portfolio
is very similar to our `CorrStats`.

You're now more than ready for using vectorbt, soldier
!🌟

[ Python
code](https://vectorbt.pro/pvt_6d299575/assets/jupytext/documentation/building-
blocks.py.txt)


In [None]:
# show all unique metadata entities fields
unique_entities = set()
for node in index_nodes:
	print(node.metadata['questions_this_excerpt_can_answer'])


In [None]:
index = VectorStoreIndex(
    nodes=index_nodes,
    service_context=ServiceContext.from_defaults(llm=OpenAI(model="gpt-4")),
	show_progress=True,
)

index.storage_context.persist("../index_notebook")

In [None]:
engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.7)
    ]
)

final_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[
        QueryEngineTool(
            query_engine=engine,
            metadata=ToolMetadata(
                name="quant_knowledge_base",
                description="technical documentation for vectorbt pro",
            ),
        )
    ],
)

In [None]:
test_query = """
In my strategy I have 4 take profits and stop loss. How can I move stop loss to breakeven after the first take profit is hit?
"""

# result = await final_engine.aquery(test_query)

In [None]:
# result

In [None]:
final_llm = LLMPredictor(
	llm=ChatOpenAI(
		temperature=gpt_temperature,
		model_name=gpt_model,
		max_tokens=2048,
		streaming=True,
	),
)


service_context = ServiceContext.from_defaults(
    llm_predictor=final_llm,
    chunk_size=1024,
)


index2 = VectorStoreIndex(
    nodes=index_nodes,
    service_context=service_context,
	show_progress=True,
)


response_synthesizer = get_response_synthesizer(
	response_mode="tree_summarize", service_context=service_context)

retriever = VectorIndexRetriever(
	index=index,
	similarity_top_k=10,
)

# assemble query engine
query_engine2 = RetrieverQueryEngine.from_args(
	streaming=True,
	retriever=retriever,
	response_synthesizer=response_synthesizer,
	service_context=service_context,
	node_postprocessors=[
		SimilarityPostprocessor(similarity_cutoff=0.7)
	]
)

In [None]:
result = await query_engine2.aquery(test_query)

In [None]:
result.response

In [None]:
# iterate all result.source_nodes and print score

for node in result.source_nodes:
	print(node.score)
	print(node.metadata['questions_this_excerpt_can_answer'])
