In [None]:
from datasets import load_dataset
import openai
import time
import tqdm
import os

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
coqa_dataset = load_dataset("stanfordnlp/coqa")

In [4]:
from llama_index.core import StorageContext, Document, Settings, KnowledgeGraphIndex, VectorStoreIndex, get_response_synthesizer
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.llms.openai import OpenAI
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

In [5]:
llm = OpenAI(temperature=0, model="gpt-3.5-turbo-instruct")
Settings.llm = llm
Settings.chunk_size = 512

In [6]:
documents = [Document(text=doc) for doc in coqa_dataset["train"].to_pandas()['story']][:10]

In [7]:
from query_engine import RAGStringQueryEngine, RAGCachedStringQueryEngine

In [8]:
qe = RAGStringQueryEngine(documents, llm)

Parsing nodes: 100%|██████████| 10/10 [00:00<00:00, 1768.03it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 15.33it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 17.45it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 21.70it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 16.18it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 19.73it/s]
Generating embeddings: 100%|██████████| 6/6 [00:00<00:00, 24.68it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 24.94it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 25.10it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 22.73it/s]
Generating embeddings: 100%|██████████| 4/4 [00:00<00:00, 21.01it/s]
Processing nodes: 100%|██████████| 10/10 [00:11<00:00,  1.15s/it]
Parsing nodes: 100%|██████████| 10/10 [00:00<00:00, 5105.04it/s]
Generating embeddings: 100%|██████████| 10/10 [00:00<00:00, 30.67it/s]


In [9]:
N_TRIALS = 10

In [10]:
times = []
for i in tqdm.tqdm(range(N_TRIALS)):
    start = time.time()
    qe.custom_query("Where does the pope live?")
    times.append(time.time()-start)
print(f"Average time: {sum(times)/N_TRIALS}")


  1%|          | 7/1000 [00:13<31:27,  1.90s/it]


KeyboardInterrupt: 

In [10]:
cqe = RAGCachedStringQueryEngine(documents, llm)

Parsing nodes: 100%|██████████| 10/10 [00:00<00:00, 4048.17it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 23.03it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 20.78it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 22.01it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 23.76it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 22.59it/s]
Generating embeddings: 100%|██████████| 6/6 [00:00<00:00, 29.30it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00,  7.53it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00,  6.01it/s]
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 16.88it/s]
Generating embeddings: 100%|██████████| 4/4 [00:00<00:00, 16.60it/s]
Processing nodes: 100%|██████████| 10/10 [00:13<00:00,  1.32s/it]
Parsing nodes: 100%|██████████| 10/10 [00:00<00:00, 4617.75it/s]
Generating embeddings: 100%|██████████| 10/10 [00:00<00:00, 28.03it/s]


In [11]:
times = []
for i in tqdm.tqdm(range(N_TRIALS)):
    start = time.time()
    cqe.custom_query("Where does the pope live?")
    times.append(time.time()-start)
print(f"Average time: {sum(times)/N_TRIALS}")

The pope lives in Vatican City.


2.064606189727783