.venv/bin/python -m pip install -r requirements.txt

In [None]:
# 1. Load HTML File/s
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_dir="html").load_data()
print(f"Loaded {len(documents)} documents.")

# 2. Chunk with SentenceSplitter
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=200) # 1 token = 4 characters
nodes = splitter.get_nodes_from_documents(documents)
print(f"Generated {len(nodes)} chunks.") 

# 3. Embed Chunks with HuggingFace
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")


Loaded 1 documents.
Generated 144 chunks.


In [24]:
# 4. Create Index
import faiss
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import VectorStoreIndex, StorageContext

faiss_index = faiss.IndexFlatL2(384)
vector_store = FaissVectorStore(faiss_index=faiss_index)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(
    nodes,
    embed_model=embed_model,
    storage_context=storage_context,
)

# Persist index to disk
index.storage_context.persist(persist_dir="./faiss_index")
print("Index persisted to ./faiss_index")

Index persisted to ./faiss_index


In [25]:
# 5. Define LLM

from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
base_url = os.getenv("KISSKI_URL")

if not api_key or not base_url:
    raise ValueError("Missing KISSKI_API_KEY or KISSKI_URL in environment.")

client = OpenAI(
    api_key=api_key,
    base_url=base_url
)

def ask_openai_llm(prompt: str) -> str:
    response = client.chat.completions.create(
        model="meta-llama-3.1-8b-instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content


In [26]:
# 6. Ask a Question
import textwrap
from IPython.display import Markdown, display

while True:
    query = input("🔍 Enter your question (or type 'q'): ").strip()
    if query.lower() == 'q':
        print("Session ended.")
        break

    nodes = index.as_retriever().retrieve(query)
    context = "\n---\n".join([n.get_content() for n in nodes])
    full_prompt = f"""You are a helpful assistant. Use the following context to answer the question.

Context:
{context}

Question:
{query}"""

    answer = ask_openai_llm(full_prompt)
    print(f"\nQ:")
    display(Markdown(textwrap.dedent(query)))
    print("\nA:")
    display(Markdown(textwrap.dedent(answer)))
    print("___\n")


Q:


what is an island?


A:


An island is a piece of land that is surrounded by water on all sides. Islands can be small or large, and they can be found in various parts of the world, including oceans, seas, and lakes. In the context of the provided text, islands are specifically mentioned as being small and vulnerable to climate change impacts, such as sea-level rise (SLR) and changes in weather patterns.

___

Session ended.
