In [None]:
## parameters
n_docs = 3 # note it's double as we use two retrievers (vector and keyword)

query = "Can i fly a drone in the city center?"

model = "gemma3:4b" # note you need to pull this model first from ollama

In [2]:
from langchain.text_splitter import TokenTextSplitter
import os

splitter = TokenTextSplitter(
    encoding_name="cl100k_base",
    chunk_size=500,
    chunk_overlap=100
)

all_chunks = []
for file in os.listdir("../data"):
    if file.endswith(".txt"):
        with open(os.path.join("../data", file), "r", encoding="utf-8") as f:
            content = f.read()
            chunks = splitter.split_text(content)
            all_chunks.extend(chunks)

In [10]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

faiss_store = FAISS.from_texts(chunks, embedding_model)


In [4]:
from langchain.retrievers import BM25Retriever
from nltk.tokenize import word_tokenize
import nltk

#nltk.download("punkt")  # First time only

bm25_retriever = BM25Retriever.from_texts(chunks)
bm25_retriever.k = n_docs  # Top-k results to return


In [5]:
from langchain.retrievers import EnsembleRetriever

hybrid = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_store.as_retriever(search_kwargs={"k": n_docs})],
    weights=[0.5, 0.5],  # Tune weights depending on which you want to prioritize
)

In [6]:
supporting_docs = []

results = hybrid.invoke(query)

for i, doc in enumerate(results):
    supporting_docs.append(doc.page_content)


In [7]:
import ollama

# basic function to prompt the model
def get_response_from_model(prompt, model="gemma3:1b"):
    response = ollama.generate(model=model, prompt=prompt)
    return response["response"]


In [8]:
def generate_prompt(query, supporting_docs):
    prompt = f"You are a helpful AI legal assistant.\n"
    prompt += "You will answer the question based only on legal interpretation of the provided documents.\n\n"
    prompt += "Supporting Documents:\n"
    for i, doc in enumerate(supporting_docs):
        prompt += f"Document {i+1}:\n{doc}\n\n"
    prompt += f"Question: {query}\nAnswer:"
    return prompt

In [9]:
prompt = generate_prompt(query, supporting_docs)
response = get_response_from_model(prompt, model=model)
print (f"Response: {response}")

Response: Based on the provided documents, determining whether you can fly a drone in a city center requires careful consideration of several factors and regulations. Here’s a breakdown of the relevant stipulations:

**Key Restrictions & Requirements:**

*   **Category of Operation is Crucial:** The documents outline three categories of UAS operations: ‘open’, ‘specific’, and ‘remote’ (or ‘special’). The rules governing each category differ significantly.
*   **‘Open’ Category – Generally Prohibited in City Centers:** The ‘open’ category (as defined in UAS.OPEN.020) is generally *not* suitable for city center operations. It’s specifically restricted to areas where you can reasonably expect no uninvolved persons to be endangered. City centers inherently have high pedestrian traffic, making this category unsuitable.
*   **‘Specific’ Category – Potentially Possible with Conditions:** The ‘specific’ category (as defined in UAS.SPEC.020) *could* be permissible, but it’s heavily reliant on m