In [1]:
import os
from dotenv import load_dotenv

import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
from llama_index.readers.web import BeautifulSoupWebReader

loader = BeautifulSoupWebReader()

In [20]:
# check if storage already exists
PERSIST_DIR = "./storage-woxstai"
urls = ["https://woxst.ai"]
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = loader.load_data(urls)
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

In [21]:
query_engine = index.as_query_engine()
response = query_engine.query("WWho are the websites about?")
print(response)

The website is about AI technology and services.


In [22]:
query_engine = index.as_query_engine()
response = query_engine.query("Explain what the website is about")
print(response)

The website is likely a domain for a company or service related to artificial intelligence, as indicated by the ".ai" domain extension.
