In [None]:
from llama_index.core import Document
from llama_index.readers.apify import ApifyActor
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
import os
import openai
# api_k goes here

In [None]:
from llama_index.embeddings.openai import OpenAIEmbedding

In [None]:
embedding_model = OpenAIEmbedding(model="text-embedding-ada-002")

In [None]:
PERSIST_DIR = "./storage_index"

if not os.path.exists(PERSIST_DIR):
    # reader line
    documents = reader.load_data(
        actor_id="apify/website-content-crawler",
        run_input={
            "startUrls": [
                {"url": "https://bulletin.vcu.edu/undergraduate/engineering/computer-science/computer-science-bs-concentration-cybersecurity/"}
            ]
        },
        dataset_mapping_function=lambda item: Document(
            text=item.get("text"),
            metadata={
                "url": item.get("url"),
            },
        ),
    )
    print("Total documents scraped: ", len(documents))

    # Create the index from documents using OpenAI embeddings
    index = VectorStoreIndex.from_documents(documents, embed_model=embedding_model)

    # Persist the index for later use
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context, embed_model=embedding_model)

query_engine = index.as_query_engine()

while True:
    question = input("Ask a question (or type 'exit' to quit): ")
    if question.lower() == "exit":
        break
    
    # Query the index
    response = query_engine.query(question)
    print("Answer:", response.response)