In [1]:
from pathlib import Path
import yaml
with open("config.yml", 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
path_fname = cfg['path_fname']
fname = cfg['fname']
output_dir = cfg['output_dir']

In [2]:
# Lod chunks from json file
import json
with open("Documents\\SORA_chunks_cleaned_manual.json", 'r', encoding='utf-8') as f:
    chunks = json.load(f)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 120


In [3]:
from PreProcessing.embeddingToolsFAISSv2 import EmbeddingToolFAISS
embedder = EmbeddingToolFAISS( output_dir=Path(output_dir), index_backend="faiss")
embeddings = embedder.load_index()


  from .autonotebook import tqdm as notebook_tqdm


✅ Loaded FAISS index with 120 vectors from PreProcessing\ProcessedFiles\index\faiss.index
   • Loaded metadata for 120 documents from PreProcessing\ProcessedFiles\index\docs.json


In [4]:
from RAG.ragv2 import RAG
rag_system = RAG(embedding_tool=embedder, chunks=chunks, default_mode="hybrid", reranker="colbert")

In [5]:
from LLM.LLM_openAI_Chatbot import LLMChatbot 

llm = LLMChatbot(rag_system=rag_system)

In [6]:
sora_specific_questions = [
    "How do I calculate the intrinsic ground risk class when flying over sparsely populated areas with transient gatherings?",
    "Which evidence is required to justify a reduction of air risk through strategic airspace mitigation measures?",
    "How should I document the robustness level for an OSO that requires medium assurance under SAIL III?",
    "What operational volume definition is acceptable when applying containment measures for BVLOS operations?",
    "How do emergency response procedures influence the compliance assessment of OSO number 10?"
]

In [7]:
# hits = rag_system.search(
#     sora_specific_questions[0],
#     top_k=50,
#     ce_keep_k=10,
# )

# print(f"Number of hits: {len(hits)}")
# for hit in hits:
#     print(f"Title: {hit['chunk_title']}, Chunk ID: {hit['chunk_index']}")

In [8]:
from LLM.LLMQueryCallQwen import QueryGenerator

llm_query = QueryGenerator(max_queries=3)
queries_output = llm_query.generate_queries('My drone has a parachute system for emergency landings. Which method should I use to calculate the ground risk buffer?')

for query in queries_output["queries"]:
    print(f"Processing query: {query["query"]} with reasoning effort: {query["reasoning_level"]}")
    resp = llm.answer(query["query"], chat_history=[], top_k=50, ce_keep_k=10, stream=True, print_prompt=False, reasoning_effort=query["reasoning_level"])

# Combine everything into a single response with an LLM layer

Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.59s/it]


Processing query: What factors should I consider when calculating the ground risk buffer for my drone's parachute system? with reasoning effort: medium
Sources:
 • [88] Unknown file source: Operational Mitigations for PDRA UAS Operations, page (?)
 • [116] Unknown file source: PDRA Characterisation and Conditions – Operational, Ground, Air Risk, and Observer Mitigations, page (?)

Answer: 

**Facts (directly from the provided context)**  

| Source | Key points |
|--------|------------|
| [88] Operational Mitigations | • Ground risk buffer must be established outside the operational volume. <br>• Default criterion: 1:1 rule – buffer ≥ planned operating height. <br>• Smaller buffer may be applied for rotary‑wing UAS using a ballistic methodology acceptable to the competent authority. <br>• Competent authority may require refinement based on SORA Step 9, considering adjacent air and ground risks. <br>• Operational volume + ground risk buffer must be fully contained within a sparsely popu