In [None]:
# install deps -- % makes sure they install in the current virtual env
%pip install transformers
%pip install torch torchvision torchaudio
%pip install llama-index

### Create index


In [None]:
# to save money we're using the local embed model instead of the OpenAI default text-embedding-ada-002
# this will use a HuggingFace embedding model instead

from llama_index import (
    ServiceContext,
    set_global_service_context,
)

service_context = ServiceContext.from_defaults(embed_model="local")
set_global_service_context(service_context)

In [None]:
import json
import os.path

from llama_index import (
    StorageContext,
    VectorStoreIndex,
    download_loader,
    load_index_from_storage,
)

PERSIST_DIR = "./storage"
FORCE_REINDEX = True


def create_and_store_embeddings():
    # load wikipedia urls
    f = open("season_one_episodes.json")
    urls = json.load(f)

    BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
    loader = BeautifulSoupWebReader()
    documents = loader.load_data(urls=urls)
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    return index


def load_existing_index():
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    return load_index_from_storage(storage_context)


if FORCE_REINDEX or not os.path.exists(PERSIST_DIR):
    index = create_and_store_embeddings()
else:
    index = load_existing_index()

### Create query engine


In [None]:
from llama_index.prompts import PromptTemplate

base_prompt = "What happened in episode 3 of season 1 of Better Call Saul? Summarize the episode in bullet points."

text_qa_template_str = """
  Context information is provided below:
  ________________________________________
  {context_str}
  ________________________________________
  Using only the context information, answer the question: {query_str}
  If the context isn't helpful, say that you don't know the answer.
  """

text_qa_template = PromptTemplate(text_qa_template_str)

query_engine = index.as_query_engine(text_qa_template=text_qa_template)

response = query_engine.query("Who is Kim Wexler?")
print(response)