In [23]:
import os
from dotenv import load_dotenv

import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [24]:
from llama_index.readers.web import BeautifulSoupWebReader

loader = BeautifulSoupWebReader()

In [25]:
# check if storage already exists
PERSIST_DIR = "./storage-woxstai"
urls = ["https://www.ratsit.se/20000920-Alexander_Gunnar_Leander_Woxstrom_Malmo/gJbgl_nvI8cLdFQJv3wY8oUCfdUwua_h9AQ80Wgnx2c"]
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = loader.load_data(urls)
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

In [31]:
query_engine = index.as_query_engine(response_mode="tree_summarize")
response = query_engine.query("WWho are the websites about?")
print(response)

The websites are about individuals and companies, providing information such as personal details, addresses, phone numbers, income statistics, marital status, and company engagements. The websites also offer services like salary checks, credit reports, and the option to purchase additional information about individuals through subscriptions.


In [33]:
query_engine = index.as_query_engine(response_mode="tree_summarize", streaming=True)
response = query_engine.query("WHo is the website about? What information is available about the person?")
print(response)

The website is about Alexander Gunnar Leander Woxström. The available information about him includes his age (24 years old), date of birth (September 20, 2000), legal gender (male), address (Stora Varvsgatan 43 lgh 1901, Malmö), phone number (076-0298423), and his involvement as the responsible person (VD) in the company Woxst AB. Additionally, details about his salary and credit status can be accessed through the website.
