In [35]:
import os
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, download_loader, ComposableGraph, GPTListIndex, GPTFaissIndex, GPTTreeIndex
from dotenv import load_dotenv
import faiss

def load_faiss_index(index_name="faiss_index.json", path="./index/faiss_index.index"):
    #path = os.path.join(path, index_name)
    return GPTFaissIndex.load_from_disk(
        index_name, 
        faiss_index_save_path=path
    )
def create_faiss_index(documents):
    # dimensions of text-ada-embedding-002
    d = 1536 
    faiss_index = faiss.IndexFlatL2(d)
    index = GPTFaissIndex.from_documents(documents, faiss_index=faiss_index)
    return index

### Compare 3 index for example wiki

In [29]:
documents = SimpleDirectoryReader(input_files=['./data/backtest-create.rtf']).load_data()

In [36]:
indices = [
    GPTSimpleVectorIndex.from_documents(documents), 
    GPTTreeIndex.from_documents(documents),
    create_faiss_index(documents)
]

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 5412 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 5412 tokens


In [23]:
online_doc_index = load_faiss_index("./index/online_doc_index.json", "./index/online_doc_index.index")

In [37]:
from llama_index.playground import Playground

playground = Playground(indices=indices)

### Ask a sample question and compare the result

In [38]:
playground.compare("how to create a new backteest?")

[1mQuery:[0m
how to create a new backteest?

Trying 15 combinations...


[1mGPTSimpleVectorIndex[0m, mode = default


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4007 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


[36;1m[1;3m

To create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTSimpleVectorIndex[0m, mode = embedding


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4007 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens
INFO:llama_index.indices.tree.leaf_query:> Starting query: how to create a new backteest?


[36;1m[1;3m

To create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTTreeIndex[0m, mode = default


INFO:llama_index.indices.tree.leaf_query:>[Level 0] Selected node: [1]/[1]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 7702 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.summarize_query:> Starting query: how to create a new backteest?


[33;1m[1;3mTo create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. You can also use profit actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTTreeIndex[0m, mode = summarize


INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 6445 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.leaf_query:> Starting query: how to create a new backteest?


[33;1m[1;3m
To create a new backtest, click the "New Backtest" button at the top of the page. This will open a new backtest window where you can select the tickers you want to backtest (SPY, SPX, QQQ, and IWM) and the date range (1/1/13 to yesterday). Under the “Strategy” section, choose from the list of common strategies or create a custom strategy by clicking the “Add Leg” option. Select the strike selection type and option legs. Link the legs if necessary. Set entry conditions such as entry time, frequency, and specific dates. Set starting funds, margin allocation % per trade, max open trades, max contracts per trade, and use VIX. Set exit conditions such as profit target and stop loss. You can also save and share your backtest by selecting the down arrow next to the New Backtest button.[0m

[1mGPTTreeIndex[0m, mode = embedding


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4007 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5421 tokens
INFO:llama_index.indices.tree.retrieve_query:> Starting query: how to create a new backteest?


[33;1m[1;3mTo create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTTreeIndex[0m, mode = retrieve


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3594 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


[33;1m[1;3m
To create a new backtest, hit the “New Backtest” button on your dashboard. You can also save and share a backtest by selecting the down arrow next to the New Backtest button and toggling the option in the “save/edit” backtest slider.[0m

[1mGPTFaissIndex[0m, mode = default


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4007 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


[38;5;200m[1;3m

To create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTFaissIndex[0m, mode = embedding


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4006 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


[38;5;200m[1;3m

To create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. You can also use profit actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m


Ran 8 combinations in total.


Unnamed: 0,Index,Mode,Output,Duration,LLM Tokens,Embedding Tokens
0,GPTSimpleVectorIndex,default,"\n\nTo create a new backtest, hit the “New Bac...",3.619419,4007,9
1,GPTSimpleVectorIndex,embedding,"\n\nTo create a new backtest, hit the “New Bac...",4.734237,4007,9
2,GPTTreeIndex,default,"To create a new backtest, hit the “New Backtes...",8.726506,7702,0
3,GPTTreeIndex,summarize,"\nTo create a new backtest, click the ""New Bac...",10.940973,6445,0
4,GPTTreeIndex,embedding,"To create a new backtest, hit the “New Backtes...",4.398623,4007,5421
5,GPTTreeIndex,retrieve,"\nTo create a new backtest, hit the “New Backt...",2.750765,3594,0
6,GPTFaissIndex,default,"\n\nTo create a new backtest, hit the “New Bac...",3.775957,4007,9
7,GPTFaissIndex,embedding,"\n\nTo create a new backtest, hit the “New Bac...",3.80834,4006,9


### TreeIndex - retrieve takes min time, use less LLM token and have a good answer. Winner!

## Round 2 - how does TreeIndex perform with Knowledge graph index?

In [43]:
from llama_index.indices.knowledge_graph.base import GPTKnowledgeGraphIndex

KG_index = GPTKnowledgeGraphIndex.from_documents(
    documents, 
    max_triplets_per_chunk=2
)
indices = [
    GPTTreeIndex.from_documents(documents),
    KG_index
]

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 5692 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens


In [45]:
playground = Playground(indices=indices)
playground.compare("how to create a new backteest?")

INFO:llama_index.indices.tree.leaf_query:> Starting query: how to create a new backteest?


[1mQuery:[0m
how to create a new backteest?

Trying 10 combinations...


[1mGPTTreeIndex[0m, mode = default


INFO:llama_index.indices.tree.leaf_query:>[Level 0] Selected node: [1]/[1]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 7703 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.summarize_query:> Starting query: how to create a new backteest?


[36;1m[1;3mTo create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTTreeIndex[0m, mode = summarize


INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 6490 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.leaf_query:> Starting query: how to create a new backteest?


[36;1m[1;3m
To create a new backtest, click the "New Backtest" button at the top of the page. This will open a new backtest window where you can select the tickers you want to backtest (SPY, SPX, QQQ, and IWM) and the date range (1/1/13 to yesterday). Under the “Strategy” section, choose from the list of common strategies or create a custom strategy by clicking the “Add Leg” option. Select the strike selection type and option legs. Link the legs if necessary. Set entry conditions such as entry time, frequency, and specific dates. Set starting funds, margin allocation % per trade, max open trades, max contracts per trade, and use VIX. Set exit conditions such as profit target and stop loss. You can also save and share your backtest by selecting the down arrow next to the New Backtest button. This will open a slider where you can save and share your backtest.[0m

[1mGPTTreeIndex[0m, mode = embedding


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4007 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5421 tokens
INFO:llama_index.indices.tree.retrieve_query:> Starting query: how to create a new backteest?


[36;1m[1;3mTo create a new backtest, hit the “New Backtest” button on your dashboard. You can then set a stop loss based on a percentage, fixed profit, closing order, or per-leg stop loss. Additionally, you can use Profit Actions to scale out of a percentage of each trade at certain profit targets, adjust your stop loss, or both.[0m

[1mGPTTreeIndex[0m, mode = retrieve


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3594 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.knowledge_graph.query:> Starting query: how to create a new backteest?


[36;1m[1;3m
To create a new backtest, hit the “New Backtest” button on your dashboard. You can also save and share a backtest by selecting the down arrow next to the New Backtest button and toggling the option in the “save/edit” backtest slider.[0m

[1mGPTKnowledgeGraphIndex[0m, mode = default


INFO:llama_index.indices.knowledge_graph.query:> Query keywords: ['backtest', 'new', 'create']
ERROR:llama_index.indices.knowledge_graph.query:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.query:> Extracted relationships: The following are knowledge triplets in the form of (subset, predicate, object):
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 245 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


[33;1m[1;3m
To create a new backtest, you will need to gather data, create a model, and then test the model against the data. Depending on the type of backtest, you may need to use different tools and techniques. For example, if you are creating a backtest for a trading strategy, you may need to use a trading simulator or a backtesting platform. Once you have gathered the data and created the model, you will need to run the backtest and analyze the results.[0m


Ran 5 combinations in total.


Unnamed: 0,Index,Mode,Output,Duration,LLM Tokens,Embedding Tokens
0,GPTTreeIndex,default,"To create a new backtest, hit the “New Backtes...",12.03306,7703,0
1,GPTTreeIndex,summarize,"\nTo create a new backtest, click the ""New Bac...",15.307041,6490,0
2,GPTTreeIndex,embedding,"To create a new backtest, hit the “New Backtes...",5.020908,4007,5421
3,GPTTreeIndex,retrieve,"\nTo create a new backtest, hit the “New Backt...",2.942588,3594,0
4,GPTKnowledgeGraphIndex,default,"\nTo create a new backtest, you will need to g...",3.35187,245,0


### Conclusion: Although KnowledgeGraph uses less LLMToken(->$), the answer is kind of vague and long. I think TreeIndex-retrieve still wins.

## Same excercise but for question/answer

In [46]:
JSONReader = download_loader("JSONReader")
json_loader = JSONReader()

discord_help_questions = json_loader.load_data('./data/help-questions.json')

KG_index = GPTKnowledgeGraphIndex.from_documents(
    discord_help_questions, 
    max_triplets_per_chunk=2
)
qa_indices_ = [
    GPTSimpleVectorIndex.from_documents(discord_help_questions), 
    GPTTreeIndex.from_documents(discord_help_questions),
    KG_index
]

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 3617 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 3479 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens


In [47]:
playground.compare("What tickers can I test?")

INFO:llama_index.indices.tree.leaf_query:> Starting query: What tickers can I test?


[1mQuery:[0m
What tickers can I test?

Trying 10 combinations...


[1mGPTTreeIndex[0m, mode = default


INFO:llama_index.indices.tree.leaf_query:>[Level 0] Selected node: [1]/[1]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 7516 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.summarize_query:> Starting query: What tickers can I test?


[36;1m[1;3mYou can test SPY, SPX, QQQ, and IWM. You can also use Profit Actions to set profit targets, adjust your stop loss, or both. You can also use the per-leg stop loss option to close the trade on stop loss based on an individual selected leg.[0m

[1mGPTTreeIndex[0m, mode = summarize


INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 5915 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.tree.leaf_query:> Starting query: What tickers can I test?


[36;1m[1;3m
You can test SPY, SPX, QQQ, and IWM, as well as any other tickers that are available in the Option Omega platform.[0m

[1mGPTTreeIndex[0m, mode = embedding


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3972 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 7 tokens
INFO:llama_index.indices.tree.retrieve_query:> Starting query: What tickers can I test?


[36;1m[1;3mYou can test SPY, SPX, QQQ, and IWM. You can also use Profit Actions to set profit targets, adjust your stop loss, or both. You can also use the per-leg stop loss option to close the trade on stop loss based on an individual selected leg.[0m

[1mGPTTreeIndex[0m, mode = retrieve


INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3549 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens
INFO:llama_index.indices.knowledge_graph.query:> Starting query: What tickers can I test?


[36;1m[1;3m
You can test SPY, SPX, QQQ, and IWM.[0m

[1mGPTKnowledgeGraphIndex[0m, mode = default


INFO:llama_index.indices.knowledge_graph.query:> Query keywords: ['Test', 'Tickers']
ERROR:llama_index.indices.knowledge_graph.query:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.query:> Extracted relationships: The following are knowledge triplets in the form of (subset, predicate, object):
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 155 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


[33;1m[1;3m
Unfortunately, without more information it is not possible to answer this question.[0m


Ran 5 combinations in total.


Unnamed: 0,Index,Mode,Output,Duration,LLM Tokens,Embedding Tokens
0,GPTTreeIndex,default,"You can test SPY, SPX, QQQ, and IWM. You can a...",4.94085,7516,0
1,GPTTreeIndex,summarize,"\nYou can test SPY, SPX, QQQ, and IWM, as well...",3.75945,5915,0
2,GPTTreeIndex,embedding,"You can test SPY, SPX, QQQ, and IWM. You can a...",3.540539,3972,7
3,GPTTreeIndex,retrieve,"\nYou can test SPY, SPX, QQQ, and IWM.",1.534845,3549,0
4,GPTKnowledgeGraphIndex,default,"\nUnfortunately, without more information it i...",1.038413,155,0


### I would recommend TreeIndex - retrieve for both document source based on the response's simplicity. 

## Evaluation/Guardrail

In [48]:
tree_index = GPTTreeIndex.from_documents(discord_help_questions)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens


In [50]:
response = tree_index.query("What tickers can I test?")

INFO:llama_index.indices.tree.leaf_query:> Starting query: What tickers can I test?
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3531 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


In [58]:
from gpt_index.evaluation import ResponseEvaluator
from gpt_index import LLMPredictor, ServiceContext
from langchain.llms import OpenAI

In [66]:
# gpt-3 (davinci)
llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003"))
service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)

evaluator = ResponseEvaluator(service_context=service_context_gpt3)

In [67]:
evaluator.evaluate(response)

INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 4042 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


'\nNO'

In [68]:
response = tree_index.query("What tickers can I test?", mode="retrieve")

INFO:llama_index.indices.tree.retrieve_query:> Starting query: What tickers can I test?
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3531 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


In [70]:
evaluator.evaluate(response)

INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 4044 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens


'\nYES'

### Overall Suggestion: Pick 2 indexes, let them both do the query, and run this evaluation, and only send to Discord if reponse is YES.