In [None]:
import os
from pathlib import Path
import logging
import sys
import yaml

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import OpenAI #, OpenAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
import llama_index

_LOGGER = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

NOTES_DIR = Path("../texts")
SECRETS = Path("../secrets.yaml")

API_URL = None
MODEL_ID = 'gpt-3.5-turbo'

# Local mistral 7-b model
# API_URL = 'https://llama-openblas.k8s.mrv.thebends.org/v1'
# MODEL_ID = 'gpt-3.5-turbo'

secrets = yaml.safe_load(SECRETS.read_text())
OPENAI_API_KEY = secrets['openai_key']

_LOGGER.info("Initializing client library")
llm = OpenAI(temperature=0.1, model=MODEL_ID, api_base=API_URL, api_key=OPENAI_API_KEY)
embed_model = OpenAIEmbedding(api_key=OPENAI_API_KEY, api_base=API_URL)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

_LOGGER.info("Loading documents")
documents = SimpleDirectoryReader(str(NOTES_DIR)).load_data()

_LOGGER.info("Indexing documents")
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What items on my todo list should i consider today?")
print("---")
print(response)

In [None]:
# Persist the index
index.storage_context.persist()

In [None]:
# Example of persisting data

PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)