# Playground

In [1]:
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

True

In [2]:
# Hide INFO logs regarding token usage, etc
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

## Setup

### Generate some example Documents

In [3]:
from llama_index import download_loader
from llama_index import (
    GPTVectorStoreIndex as VectorStoreIndex,
    GPTTreeIndex as TreeIndex
)

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()
documents = loader.load_data(pages=['Berlin'])

### Create a list of any sort of indices (custom LLMs, custom embeddings, etc)

In [4]:
indices = [VectorStoreIndex.from_documents(documents), TreeIndex.from_documents(documents)]

## Using the Playground


### Initialize with indices

In [5]:
from llama_index.playground import Playground

playground = Playground(indices=indices)

In [6]:
playground.compare("What is the population of Berlin?")

[1mQuery:[0m
What is the population of Berlin?

[1mGPTVectorStoreIndex[0m, retriever mode = default
[36;1m[1;3m
The population of Berlin is 3.7 million.[0m

[1mGPTTreeIndex[0m, retriever mode = select_leaf
[33;1m[1;3m
It is not possible to answer this question with the given context information.[0m

[1mGPTTreeIndex[0m, retriever mode = select_leaf_embedding
[33;1m[1;3m
The population of Berlin is approximately 3.7 million inhabitants.[0m

[1mGPTTreeIndex[0m, retriever mode = all_leaf
[33;1m[1;3m

The population of Berlin is estimated to be around 3.7 million inhabitants, making it the most populous city in Germany. It is home to a diverse population, with people from over 190 countries living in the city. The largest religious denomination recorded in 2018 was the Protestant regional church body—the Evangelical Church of Berlin-Brandenburg-Silesian Upper Lusatia (EKBO)—a united church. EKBO is a member of the Evangelical Church in Germany (EKD) and Union Evangelisc

Unnamed: 0,Index,Retriever Mode,Output,Duration,LLM Tokens,Embedding Tokens
0,GPTVectorStoreIndex,default,\nThe population of Berlin is 3.7 million.,2.776464,1799,0
1,GPTTreeIndex,select_leaf,\nIt is not possible to answer this question w...,16.467436,896,0
2,GPTTreeIndex,select_leaf_embedding,\nThe population of Berlin is approximately 3....,10.731784,913,0
3,GPTTreeIndex,all_leaf,\n\nThe population of Berlin is estimated to b...,733.014671,38632,0
4,GPTTreeIndex,root,\nThe population of Berlin is 3.7 million with...,3.582931,696,0


### Initialize with Documents

In [None]:
# Uses documents in a preset list of indices
playground = Playground.from_docs(documents=documents)

### Save the indices for future use

In [2]:
tree_index_persist_dir = '/media/sunhuawei/data/files/gpt_tree_index/wikipedia/berlin'
vector_index_persist_dir = '/media/sunhuawei/data/files/gpt_vector_store_index/wikipedia/berlin'

In [11]:
indices[0].storage_context.persist(persist_dir=vector_index_persist_dir)
indices[1].storage_context.persist(persist_dir=tree_index_persist_dir)

### Reload from disk

In [12]:
from llama_index import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=tree_index_persist_dir)

# load index
tree_index = load_index_from_storage(storage_context)

### Query the index

In [44]:
query_engine = tree_index.as_query_engine(child_branch_factor=2)
response = query_engine.query('What is the population of Berlin?')
print(response)
print(response.get_formatted_sources())


The population of Berlin is 3.7 million.
> Source (Doc id: d13ab134-ce57-4db4-8339-c6695874e917): Berlin ( bur-LIN, German: [bɛʁˈliːn] (listen)) is the capital and largest city of Germany by both...

> Source (Doc id: 630cf07e-5c52-4ee3-ac56-601b37992237): western part, underlining the US support for West Berlin. Berlin was completely divided. Although...


In [16]:
from llama_index import ResponseSynthesizer
from llama_index.indices.postprocessor import KeywordNodePostprocessor, PrevNextNodePostprocessor, SimilarityPostprocessor
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.retrievers import VectorIndexRetriever

In [17]:
storage_cxt = StorageContext.from_defaults(persist_dir=vector_index_persist_dir)
vector_index = load_index_from_storage(storage_cxt)
query_engine = vector_index.as_query_engine()
response = query_engine.query('What is the population of Berlin?')
print(response)


The population of Berlin is 3.7 million.


In [3]:
from llama_index import SimpleDirectoryReader
filename_fn = lambda filename: {'file_name': filename}

# automatically sets the extra_info of each document according to filename_fn
documents = SimpleDirectoryReader(vector_index_persist_dir, file_metadata=filename_fn, filename_as_id=True).load_data()

In [4]:
documents[0].extra_info

{'file_name': '/media/sunhuawei/data/files/gpt_vector_store_index/wikipedia/berlin/docstore.json'}

In [5]:
documents[0].extra_info_str

'file_name: /media/sunhuawei/data/files/gpt_vector_store_index/wikipedia/berlin/docstore.json'

In [6]:
documents[0].doc_id

'/media/sunhuawei/data/files/gpt_vector_store_index/wikipedia/berlin/docstore.json'