In [2]:
import os
from dotenv import load_dotenv

import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
from llama_index.readers.web import BeautifulSoupWebReader

loader = BeautifulSoupWebReader()

In [4]:
# check if storage already exists
PERSIST_DIR = "./storage-woxstai"
urls = ["https://www.car.gr/classifieds/cars/view/41649924-volkswagen-kaefer?fromfeed=1"]
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = loader.load_data(urls)
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

In [5]:
query_engine = index.as_query_engine(response_mode="tree_summarize")
input_question = "How many horses does the car have?"
response = query_engine.query(input_question)
print(response)

The car has 34 horsepower.


In [6]:
query_engine = index.as_query_engine(response_mode="tree_summarize", streaming=True)
response = query_engine.query(input("Enter a question: "))
print(response.source_nodes)

In [None]:
query_engine = index.as_query_engine(response_mode="tree_summarize")
input_question = "What is the price of the car?"
response = query_engine.query(input_question)
print(response)

The price of the car is 24.850 €.


In [None]:
query_engine = index.as_query_engine(response_mode="tree_summarize")
input_question = "What is being sold?"
response = query_engine.query(input_question)
print(response)

A fully restored classic car, a Volkswagen Kaefer, is being sold.
