In [1]:
# This is due to the fact that we use asyncio.loop_until_complete in
# the GithubRepositoryReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
!pip install nest_asyncio
import nest_asyncio

nest_asyncio.apply()



In [None]:
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from gpt_index import (
    GPTSimpleVectorIndex,
    GPTQdrantIndex,
    GPTTreeIndex,
    GPTFaissIndex,
    GPTWeaviateIndex,
    GPTListIndex,
    GPTSimpleKeywordTableIndex,
    GPTKeywordTableIndex,
    GPTPineconeIndex,
    GPTRAKEKeywordTableIndex,
    GPTSQLStructStoreIndex,
    GithubRepositoryReader,
)
from IPython.display import Markdown, display

In [None]:
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxx_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
import os
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "gpt_index"
branch = "main"
reader = GithubRepositoryReader(
    github_token=github_token,
    owner=owner,
    repo=repo,
    use_parser=True,
    verbose=True,
    ignore_directories=["examples", "docs", ".vscode"],
    ignore_file_extensions=[
        ".png",
        ".jpg",
        ".jpeg",
        ".gif",
        ".svg",
        ".ico",
        ".json",
        ".csv",
    ],
    concurrent_requests=5,
)

In [8]:
documents = reader.load_data(branch=branch)
print(f"Loaded {len(documents)} documents")

current path: 
processing tree 6a4a7f614c36eb928f20f2e6373566f029116977
found blob .flake8
recursing into .github
	current path: .github
	processing tree d5fb28aaff20564dc61cc003d66ba60d38cbfe7f
	recursing into workflows
		current path: .github/workflows
		processing tree a7711de32a8df15bcef97cb1b66af9e85a8ffd4c
		found blob build_package.yml
		found blob lint.yml
		found blob unit_test.yml
found blob .gitignore
found blob .readthedocs.yaml
recursing into .vscode
ignoring tree .vscode due to directory
found blob CITATION.cff
found blob CONTRIBUTING.md
found blob LICENSE
found blob MANIFEST.in
found blob Makefile
found blob README.md
found blob data_requirements.txt
recursing into docs
ignoring tree docs due to directory
recursing into examples
ignoring tree examples due to directory
recursing into experimental
	current path: experimental
	processing tree 832a942385716cdbda057a030748f4d878ea7e38
	found blob README.md
	recursing into classifier
		current path: experimental/classifier
		p

In [16]:
from gpt_index import Document
from gpt_index import QueryMode
from gpt_index import SummaryPrompt
from typing import List
verbose = False
def seperate_documents_into_folders(documents: List[Document]):
    folders = {}
    for doc in documents:
        file_path = doc.extra_info["file_path"]
        fp = "/".join(file_path.split("/")[:-1])
        folders.setdefault(fp, [])
        folders[fp].append(doc)
    return folders

if verbose:
    for folder, docs in seperate_documents_into_folders(documents).items():
        print(f"{folder}: {len(docs)}")

CODE_FILE_SUMMARY_PROMPT_TMPL = (
    "Write a summary of the following code file. Try to explain the purpose, "
    "functionality, and key elements of the code. "
    "Try to include as many details as possible, but also keep it concise.\n"
    "\n"
    "\n"
    "{context_str}\n"
    "\n"
    "\n"
    'SUMMARY:"""\n'
)

CODE_FILE_SUMMARY_PROMPT = SummaryPrompt(CODE_FILE_SUMMARY_PROMPT_TMPL)

indexes_by_folder = {}
for folder, docs in seperate_documents_into_folders(documents).items():
    print(f"Indexing {folder} with {len(docs)} documents")
    
    
    index = GPTTreeIndex(
        documents=docs,
        num_children=12,
        summary_template=CODE_FILE_SUMMARY_PROMPT,
    )
    summary = index.query(
        "What are the summaries of these documents?", mode=QueryMode.SUMMARIZE
    )
    index.set_text(str(summary))
    indexes_by_folder[folder] = index
    print(f"Indexed {folder} with {len(docs)} documents")
    

    

    

INFO:root:> Building index from nodes: 1 chunks


Indexing  with 13 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 4482 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 5898 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed  with 13 documents
Indexing .github/workflows with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 1507 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1912 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed .github/workflows with 3 documents
Indexing experimental with 1 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 192 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 423 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 3 chunks


Indexed experimental with 1 documents
Indexing experimental/classifier with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 11157 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 3 chunks
INFO:root:> [query] Total LLM token usage: 13265 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed experimental/classifier with 2 documents
Indexing gpt_index with 7 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7957 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 9666 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index with 7 documents
Indexing gpt_index/composability with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3945 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 4728 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/composability with 2 documents
Indexing gpt_index/data_structs with 4 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 6443 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 7524 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/data_structs with 4 documents
Indexing gpt_index/embeddings with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 5501 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 6691 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 4 chunks


Indexed gpt_index/embeddings with 5 documents
Indexing gpt_index/indices with 6 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 15779 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 4 chunks
INFO:root:> [query] Total LLM token usage: 17830 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices with 6 documents
Indexing gpt_index/indices/common with 1 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 163 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 396 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/common with 1 documents
Indexing gpt_index/indices/common/struct_store with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2158 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2374 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/common/struct_store with 2 documents
Indexing gpt_index/indices/common/tree with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2513 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2830 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/indices/common/tree with 2 documents
Indexing gpt_index/indices/keyword_table with 6 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 5952 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 7318 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/keyword_table with 6 documents
Indexing gpt_index/indices/list with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2398 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2803 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/indices/list with 3 documents
Indexing gpt_index/indices/query with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 10153 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 11172 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/query with 5 documents
Indexing gpt_index/indices/query/keyword_table with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3308 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 3703 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/query/keyword_table with 2 documents
Indexing gpt_index/indices/query/list with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2360 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2786 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/indices/query/list with 3 documents
Indexing gpt_index/indices/query/struct_store with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2250 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2719 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/indices/query/struct_store with 2 documents
Indexing gpt_index/indices/query/tree with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 8747 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 9728 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/indices/query/tree with 5 documents
Indexing gpt_index/indices/query/vector_store with 7 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 9193 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 10719 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/indices/query/vector_store with 7 documents
Indexing gpt_index/indices/response with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 5001 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 5717 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/indices/response with 2 documents
Indexing gpt_index/indices/struct_store with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7055 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 8010 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/indices/struct_store with 3 documents
Indexing gpt_index/indices/tree with 4 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7655 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 8467 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 4 chunks


Indexed gpt_index/indices/tree with 4 documents
Indexing gpt_index/indices/vector_store with 7 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 17900 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 5 chunks
INFO:root:> [query] Total LLM token usage: 20168 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 3 chunks


Indexed gpt_index/indices/vector_store with 7 documents
Indexing gpt_index/langchain_helpers with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 11335 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 3 chunks
INFO:root:> [query] Total LLM token usage: 12896 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/langchain_helpers with 5 documents
Indexing gpt_index/prompts with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 9241 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 10892 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 7 chunks


Indexed gpt_index/prompts with 5 documents
Indexing gpt_index/readers with 18 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 27274 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 7 chunks
INFO:root:> [query] Total LLM token usage: 30827 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 4 chunks


Indexed gpt_index/readers with 18 documents
Indexing gpt_index/readers/file with 11 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 16175 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 4 chunks
INFO:root:> [query] Total LLM token usage: 18765 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens


Indexed gpt_index/readers/file with 11 documents
Indexing gpt_index/readers/github_readers with 4 documents


INFO:root:> Building index from nodes: 5 chunks
INFO:root:> [build_index_from_documents] Total LLM token usage: 19746 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 5 chunks
INFO:root:> [query] Total LLM token usage: 22117 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/readers/github_readers with 4 documents
Indexing gpt_index/readers/google_readers with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3120 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 3790 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/readers/google_readers with 2 documents
Indexing gpt_index/readers/make_com with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 997 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1394 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/readers/make_com with 2 documents
Indexing gpt_index/readers/schema with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 600 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 846 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed gpt_index/readers/schema with 2 documents
Indexing gpt_index/readers/weaviate with 4 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 8091 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 9393 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/readers/weaviate with 4 documents
Indexing gpt_index/response with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 1146 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1477 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed gpt_index/response with 2 documents
Indexing gpt_index/token_counter with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 4413 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 5089 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed gpt_index/token_counter with 5 documents
Indexing tests with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2231 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2514 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed tests with 3 documents
Indexing tests/indices with 5 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7946 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 8941 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed tests/indices with 5 documents
Indexing tests/indices/embedding with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 2425 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 2598 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/indices/embedding with 2 documents
Indexing tests/indices/keyword_table with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3773 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 5092 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/indices/keyword_table with 3 documents
Indexing tests/indices/list with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 6765 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 7466 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed tests/indices/list with 2 documents
Indexing tests/indices/query with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7454 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 8082 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/indices/query with 3 documents
Indexing tests/indices/struct_store with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3883 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 4632 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/indices/struct_store with 2 documents
Indexing tests/indices/tree with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 4941 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 5452 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 2 chunks


Indexed tests/indices/tree with 2 documents
Indexing tests/indices/vector_store with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 7978 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 2 chunks
INFO:root:> [query] Total LLM token usage: 8622 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed tests/indices/vector_store with 2 documents
Indexing tests/langchain_helpers with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 1013 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1276 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/langchain_helpers with 2 documents
Indexing tests/mock_utils with 6 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 3926 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 4774 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed tests/mock_utils with 6 documents
Indexing tests/prompts with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 1268 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1561 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 1 chunks


Indexed tests/prompts with 2 documents
Indexing tests/readers with 3 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 5602 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 1 chunks
INFO:root:> [query] Total LLM token usage: 6762 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens
INFO:root:> Building index from nodes: 0 chunks


Indexed tests/readers with 3 documents
Indexing tests/token_predictor with 2 documents


INFO:root:> [build_index_from_documents] Total LLM token usage: 862 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens
INFO:root:> Starting query: What are the summaries of these documents?
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1134 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens


Indexed tests/token_predictor with 2 documents


In [21]:
# save the indexes to disk
folder: str
for folder, index in indexes_by_folder.items():
    index.save_to_disk(f"indexes/{folder.replace('/', '-')}.json")

In [22]:
new_indexes_by_folder = {}
# read the indexes from disk
for index_filename in os.listdir("indexes"):
    print(f"Loading index {index_filename}")
    index = GPTTreeIndex.load_from_disk(f"indexes/{index_filename}")
    new_indexes_by_folder[index_filename.replace(".json", "").replace('-', '/')] = index

Loading index gpt_index-indices-vector_store.json
Loading index gpt_index-indices-query-tree.json
Loading index gpt_index-readers-file.json
Loading index .github-workflows.json
Loading index gpt_index-readers.json
Loading index gpt_index-indices-query-list.json
Loading index gpt_index-readers-weaviate.json
Loading index gpt_index-indices-common-struct_store.json
Loading index gpt_index-readers-schema.json
Loading index tests-indices.json
Loading index tests-indices-keyword_table.json
Loading index gpt_index-readers-make_com.json
Loading index tests-indices-vector_store.json
Loading index gpt_index-response.json
Loading index gpt_index-indices-query-keyword_table.json
Loading index tests-readers.json
Loading index gpt_index-indices-common-tree.json
Loading index tests-prompts.json
Loading index experimental.json
Loading index gpt_index-indices.json
Loading index tests-indices-tree.json
Loading index gpt_index.json
Loading index gpt_index-langchain_helpers.json
Loading index gpt_index-in

In [25]:
list_index = GPTListIndex([*indexes_by_folder.values()])

from gpt_index.composability import ComposableGraph

graph = ComposableGraph.build_from_index(list_index)

# [Optional] save to disk
graph.save_to_disk("indexes/composable_graph.json")

# [Optional] load from disk
graph = ComposableGraph.load_from_disk("indexes/composable_graph.json")

INFO:root:> [build_index_from_documents] Total LLM token usage: 0 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens


In [37]:
from gpt_index import QueryConfig
from gpt_index import IndexStructType

# query = "Explain the purpose of the class `GPTTreeIndex` in the file gpt_index/indices/tree/base.py."
query = "Find every class that extends the class `BaseGPTIndex`. What are the summaries of these classes?"

query_configs = [
    QueryConfig(
        index_struct_type=IndexStructType.TREE,
        query_mode=QueryMode.SUMMARIZE,
        # query_kwargs={
        #     "child_branch_factor": 2
        # }
    ),
    QueryConfig(
        index_struct_type=IndexStructType.LIST,
        query_mode=QueryMode.EMBEDDING,
    ),
]

response = graph.query(
    query, query_configs=query_configs
)

display(Markdown(f"**Query:** {query}"))
display(Markdown(f"**Response:** {response.response}\n"))

INFO:root:> Starting query: Find every class that extends the class `BaseGPTIndex`. What are the summaries of these classes?
INFO:root:> Building index from nodes: 5 chunks
Traceback (most recent call last):
  File "/home/ahmetk/Projects/gpt_index/gpt_index/utils.py", line 157, in retry_on_exceptions_with_backoff
    return lambda_fn()
           ^^^^^^^^^^^
  File "/home/ahmetk/Projects/gpt_index/gpt_index/langchain_helpers/chain_wrapper.py", line 100, in <lambda>
    lambda: llm_chain.predict(**full_prompt_args),
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ahmetk/.pyenv/versions/3.11.0/envs/gpt_index-github-reader/lib/python3.11/site-packages/langchain/chains/llm.py", line 104, in predict
    return self(kwargs)[self.output_key]
           ^^^^^^^^^^^^
  File "/home/ahmetk/.pyenv/versions/3.11.0/envs/gpt_index-github-reader/lib/python3.11/site-packages/langchain/chains/base.py", line 155, in __call__
    raise e
  File "/home/ahmetk/.pyenv/versions/3.11.0/envs/gpt

KeyboardInterrupt: 