In [1]:
import os
import re
import json
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings('ignore')

import pdfplumber
from PyPDF2 import PdfReader
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer
import torch
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import ollama

print("All libraries imported.")

BASE = r"E:\PROJECTS\climate_legal_readiness_project"
DATA_DIR = os.path.join(BASE, "data")
RAW_PDFS_DIR = os.path.join(DATA_DIR, "legal_docs")

PROC_DIR = os.path.join(DATA_DIR, "processed")
TEXT_DIR = os.path.join(PROC_DIR, "texts")
TRANSLATED_DIR = os.path.join(PROC_DIR, "texts_translated")
CHUNK_DIR = os.path.join(PROC_DIR, "chunks")
VECTOR_DIR = os.path.join(PROC_DIR, "vectorstore")

for path in [PROC_DIR, TEXT_DIR, TRANSLATED_DIR, CHUNK_DIR, VECTOR_DIR]:
    os.makedirs(path, exist_ok=True)

print(f"All directories ensured to exist.")
print(f"PDFs will be read from: {RAW_PDFS_DIR}")
print(f"Final English text will be in: {TRANSLATED_DIR}")
print(f"Chunks will be saved to: {CHUNK_DIR}")
print(f"Vector DB will be in: {VECTOR_DIR}")

All libraries imported.
All directories ensured to exist.
PDFs will be read from: E:\PROJECTS\climate_legal_readiness_project\data\legal_docs
Final English text will be in: E:\PROJECTS\climate_legal_readiness_project\data\processed\texts_translated
Chunks will be saved to: E:\PROJECTS\climate_legal_readiness_project\data\processed\chunks
Vector DB will be in: E:\PROJECTS\climate_legal_readiness_project\data\processed\vectorstore


In [None]:
def basic_clean(text):
    text = text.replace("\x00", "")
    text = re.sub(r"[ \t]+", " ", text) 
    text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text)
    return text.strip()

pdf_files = [f for f in os.listdir(RAW_PDFS_DIR) if f.lower().endswith(".pdf")]
print(f"Found {len(pdf_files)} PDF files to extract.")
report = []

for pdf in tqdm(pdf_files, desc="Extracting text from PDFs"): 
    in_path = os.path.join(RAW_PDFS_DIR, pdf)
    out_path = os.path.join(TEXT_DIR, os.path.splitext(pdf)[0] + ".txt")
    
    if os.path.exists(out_path) and os.path.getsize(out_path) > 200:
        report.append({"file": pdf, "status": "exists"})
        continue

    pages = []
    try:
        with pdfplumber.open(in_path) as doc:
            for p in doc.pages:
                txt = p.extract_text() or ""
                pages.append(txt)
    except Exception as e:
        print(f"pdfplumber failed for {pdf}: {e}. Trying PyPDF2.")
        pages = []

    if sum(len(p) for p in pages) < 50:
        try:
            r = PdfReader(in_path)
            pages = [p.extract_text() or "" for p in r.pages]
        except Exception as e:
            print(f"PyPDF2 also failed for {pdf}: {e}")
            pages = []

    text = basic_clean("\n\n".join(pages))
    
    if len(text) < 50:
        status = "empty_or_scanned_pdf"
    else:
        with open(out_path, "w", encoding="utf-8") as f:
            f.write(text)
        try:
            lang = detect(text[:1000])
        except:
            lang = "unknown"
        status = f"ok ({lang})"
        
    report.append({"file": pdf, "status": status})

print("\nPDF extraction complete.")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model_cache = {}
def get_translator(model_name):
    if model_name not in model_cache:
        print(f"Loading model {model_name}... (This may take a while)")
        model_cache[model_name] = {
            "tokenizer": MarianTokenizer.from_pretrained(model_name),
            "model": MarianMTModel.from_pretrained(model_name).to(device)
        }
    return model_cache[model_name]

def translate_text(text, model_name, chunk_size=1000):
    translator = get_translator(model_name)
    tokenizer = translator["tokenizer"]
    model = translator["model"]
    
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    translated_parts = []
    
    for ch in tqdm(chunks, desc=f"Translating {model_name}"):
        try:
            inputs = tokenizer([ch], return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
            translated_ids = model.generate(**inputs, max_length=512)
            translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
            translated_parts.append(translated_text)
        except Exception as e:
            print(f"Error translating chunk: {e}")
            translated_parts.append("[Translation Error]") 
            
    return "\n\n".join(translated_parts)

all_text_files = [f for f in os.listdir(TEXT_DIR) if f.endswith(".txt") and not f.startswith("_")]

for fname in tqdm(all_text_files, desc="Processing all text files"): 
    in_path = os.path.join(TEXT_DIR, fname)
    out_path = os.path.join(TRANSLATED_DIR, fname)
    
    if os.path.exists(out_path):
        print(f"Skipping {fname}, translated file already exists.")
        continue
        
    with open(in_path, "r", encoding="utf-8", errors="ignore") as f:
        text = f.read()
        
    if len(text) < 50:
        print(f"Skipping short file: {fname}")
        continue

    final_text = ""
    if fname.endswith("_es.txt"):
        print(f"Translating Spanish file: {fname}")
        final_text = translate_text(text, "Helsinki-NLP/opus-mt-es-en")
    elif fname.endswith("_bn.txt"): 
        print(f"Translating Bengali file: {fname}")
        final_text = translate_text(text, "Helsinki-NLP/opus-mt-bn-en")
    else:
        print(f"Copying English file: {fname}")
        final_text = text 
    
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(final_text)

print("\nTranslation and processing complete. Final English files are in 'texts_translated'.")

In [3]:
def chunk_text(text, chunk_size=1200, overlap=200):
    if not isinstance(text, str): return []
    text = re.sub(r"\n{3,}", "\n\n", text.strip())
    paragraphs = text.split("\n\n")
    chunks, current_chunk = [], ""
    
    for paragraph in paragraphs:
        paragraph = paragraph.strip()
        if not paragraph: continue
        if len(current_chunk) == 0 or len(current_chunk) + len(paragraph) + 2 <= chunk_size:
            current_chunk += "\n\n" + paragraph if current_chunk else paragraph
        elif len(paragraph) > chunk_size:
             if current_chunk: chunks.append(current_chunk)
             start = 0
             while start < len(paragraph):
                 end, split_point = start + chunk_size, start + chunk_size
                 if end < len(paragraph):
                      space_pos = paragraph.rfind(' ', start + chunk_size - overlap // 2, end)
                      if space_pos != -1: split_point = space_pos
                 chunks.append(paragraph[start:split_point].strip())
                 start = max(split_point - overlap, start + 1) if overlap > 0 else split_point
             current_chunk = ""
        else:
            if current_chunk: chunks.append(current_chunk)
            overlap_text = ""
            if overlap > 0 and chunks:
                overlap_candidate = chunks[-1][-(overlap):]
                if len(overlap_candidate) + len(paragraph) + 2 < chunk_size + 50 :
                    overlap_text = overlap_candidate + "\n\n"
            current_chunk = (overlap_text + paragraph)[:chunk_size]
    if current_chunk: chunks.append(current_chunk)
    return [c.strip() for c in chunks if len(c.strip()) > 50]

manifest = []
text_files_to_chunk = [f for f in os.listdir(TRANSLATED_DIR) if f.endswith(".txt") and not f.startswith("_")]
print(f"Found {len(text_files_to_chunk)} English .txt files to chunk.")

for fname in tqdm(text_files_to_chunk, desc="Chunking documents"): 
    full_path, base_name = os.path.join(TRANSLATED_DIR, fname), os.path.splitext(fname)[0]
    
    parts, country_name = base_name.split('_'), "Unknown"
    if len(parts) > 0:
        if parts[0].upper() in ["NAP", "LAW", "ACT", "BILL"]:
             if len(parts) > 1: country_name = parts[1]
        else: country_name = parts[0]
        country_name = country_name.replace('bn', '').replace('es', '').strip('_ ')
        if country_name.lower() == "southafrica": country_name = "South Africa"
        elif country_name: country_name = country_name.capitalize()
    doc_type = "NAP" if "NAP" in base_name.upper() else "LAW"
    if "Act" in base_name or "Ley" in base_name or "Bill" in base_name: doc_type = "LAW"
    
    try:
        with open(full_path, "r", encoding="utf-8", errors="ignore") as f: text_content = f.read()
        if len(text_content) < 50: continue
        list_of_chunks = chunk_text(text_content, chunk_size=1200, overlap=200)
        if not list_of_chunks: continue
        
        out_jsonl_path = os.path.join(CHUNK_DIR, base_name + ".jsonl")
        with open(out_jsonl_path, "w", encoding="utf-8") as f_out:
            for i, chunk_text_content in enumerate(list_of_chunks):
                record = {
                    "id": f"{base_name}::chunk_{i}", "text": chunk_text_content, "source_file": fname,
                    "country": country_name, "doc_type": doc_type
                }
                f_out.write(json.dumps(record, ensure_ascii=False) + "\n")
                
        manifest.append({"file": fname, "chunks": len(list_of_chunks), "country": country_name, "doc_type": doc_type, "status": "ok"})
        print(f"Saved {len(list_of_chunks)} chunks for {fname} (Country: {country_name})")
    except Exception as e: print(f"\n--- Error processing file: {fname} ---: {e}")

manifest_path = os.path.join(PROC_DIR, "chunk_manifest.json")
with open(manifest_path, "w", encoding="utf-8") as f_manifest:
    json.dump(manifest, f_manifest, ensure_ascii=False, indent=2)
print(f"\nChunking complete. Manifest saved to: {manifest_path}")

Found 12 English .txt files to chunk.


Chunking documents:   0%|          | 0/12 [00:00<?, ?it/s]

Saved 26 chunks for Bangladesh_ClimateChangeTrustAct_2010_bn.txt (Country: Bangladh)
Saved 182 chunks for Chile_LeyMarcoCambioClimatico_2022_es.txt (Country: Chile)
Saved 161 chunks for Fiji_ClimateChangeAct_2021.txt (Country: Fiji)
Saved 46 chunks for Kenya_ClimateChangeAct_2016.txt (Country: Kenya)


Chunking documents: 100%|██████████| 12/12 [00:00<00:00, 28.13it/s]

Saved 445 chunks for NAP_Bangladesh.txt (Country: Bangladh)
Saved 536 chunks for NAP_Chile.txt (Country: Chile)
Saved 241 chunks for NAP_Fiji.txt (Country: Fiji)
Saved 107 chunks for NAP_Kenya.txt (Country: Kenya)
Saved 217 chunks for NAP_Pakistan.txt (Country: Pakistan)
Saved 169 chunks for NAP_SouthAfrica.txt (Country: South Africa)
Saved 22 chunks for Pakistan_ClimateChangeAct_2017.txt (Country: Pakistan)
Saved 55 chunks for SouthAfrica_ClimateChangeBill_2022.txt (Country: South Africa)






Chunking complete. Manifest saved to: E:\PROJECTS\climate_legal_readiness_project\data\processed\chunk_manifest.json


In [4]:
print("Loading embedding model (all-MiniLM-L6-v2)...")
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
print("Model loaded.")
print(f"Initializing vector database at: {VECTOR_DIR}")
client = chromadb.PersistentClient(path=VECTOR_DIR, settings=Settings(allow_reset=True))

print("Resetting collection 'climate_laws_nap'...")
try:
    client.delete_collection(name="climate_laws_nap")
    time.sleep(1) 
except Exception as e:
    print(f"Note: Could not delete collection (may not exist): {e}")

collection = client.create_collection(name="climate_laws_nap", metadata={"hnsw:space": "cosine"})
print("Collection created.")

jsonl_files = [f for f in os.listdir(CHUNK_DIR) if f.endswith(".jsonl")]
total_indexed = 0

for jsonl_file in tqdm(jsonl_files, desc="Indexing documents"): 
    file_path = os.path.join(CHUNK_DIR, jsonl_file)
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            records = [json.loads(line) for line in f if line.strip()]
        if not records: continue

        texts = [r.get("text", "") for r in records]
        ids = [r.get("id") for r in records]
        metadatas = [{"source_file": r.get("source_file"), "country": r.get("country"), "doc_type": r.get("doc_type")} for r in records]
        
        valid_indices = [i for i, t in enumerate(texts) if t]
        if len(valid_indices) < len(texts):
            texts = [texts[i] for i in valid_indices]
            ids = [ids[i] for i in valid_indices]
            metadatas = [metadatas[i] for i in valid_indices]

        if not texts:
             print(f"Skipping {jsonl_file}, no valid text found.")
             continue

        print(f"\nIndexing {len(texts)} chunks for {records[0].get('country')}, {records[0].get('doc_type')}...")
        
        embeddings = embedder.encode(texts, batch_size=32, show_progress_bar=False)
        embeddings_list = embeddings.tolist()

        collection.add(documents=texts, embeddings=embeddings_list, metadatas=metadatas, ids=ids)
        total_indexed += len(records)
        print(f"Successfully added {len(records)} chunks.")
    except Exception as e:
        print(f"\n--- Error processing file: {jsonl_file} ---: {e}")
        import traceback
        traceback.print_exc()

print(f"\nRe-indexing complete. Total chunks indexed: {total_indexed}")

Loading embedding model (all-MiniLM-L6-v2)...
Model loaded.
Initializing vector database at: E:\PROJECTS\climate_legal_readiness_project\data\processed\vectorstore
Resetting collection 'climate_laws_nap'...
Collection created.


Indexing documents:   0%|          | 0/12 [00:00<?, ?it/s]


Indexing 26 chunks for Bangladh, LAW...


Indexing documents:   8%|▊         | 1/12 [00:02<00:22,  2.00s/it]

Successfully added 26 chunks.

Indexing 182 chunks for Chile, LAW...


Indexing documents:  17%|█▋        | 2/12 [00:08<00:49,  4.90s/it]

Successfully added 182 chunks.

Indexing 161 chunks for Fiji, LAW...


Indexing documents:  25%|██▌       | 3/12 [00:15<00:49,  5.47s/it]

Successfully added 161 chunks.

Indexing 46 chunks for Kenya, LAW...


Indexing documents:  33%|███▎      | 4/12 [00:16<00:32,  4.03s/it]

Successfully added 46 chunks.

Indexing 445 chunks for Bangladh, NAP...


Indexing documents:  42%|████▏     | 5/12 [00:35<01:03,  9.12s/it]

Successfully added 445 chunks.

Indexing 536 chunks for Chile, NAP...


Indexing documents:  50%|█████     | 6/12 [00:59<01:25, 14.32s/it]

Successfully added 536 chunks.

Indexing 241 chunks for Fiji, NAP...


Indexing documents:  58%|█████▊    | 7/12 [01:10<01:06, 13.32s/it]

Successfully added 241 chunks.

Indexing 107 chunks for Kenya, NAP...


Indexing documents:  67%|██████▋   | 8/12 [01:16<00:43, 10.78s/it]

Successfully added 107 chunks.

Indexing 217 chunks for Pakistan, NAP...


Indexing documents:  75%|███████▌  | 9/12 [01:26<00:32, 10.76s/it]

Successfully added 217 chunks.

Indexing 169 chunks for South Africa, NAP...


Indexing documents:  83%|████████▎ | 10/12 [01:35<00:20, 10.17s/it]

Successfully added 169 chunks.

Indexing 22 chunks for Pakistan, LAW...


Indexing documents:  92%|█████████▏| 11/12 [01:36<00:07,  7.39s/it]

Successfully added 22 chunks.

Indexing 55 chunks for South Africa, LAW...


Indexing documents: 100%|██████████| 12/12 [01:39<00:00,  8.33s/it]

Successfully added 55 chunks.

Re-indexing complete. Total chunks indexed: 2207





In [8]:
try:
    print("Attempting to connect to Ollama server...")
    models_data_raw = ollama.list()
    print("Success! Connected to Ollama.")
    if models_data_raw and models_data_raw.get('models'):
        available_models = [m.get('model', 'Attribute Error') for m in models_data_raw['models']]
        print(f"\nAvailable models: {available_models}")
        
        if any('llama3' in m for m in available_models):
            print(" Found 'llama3' model.")
        else:
            print(" 'llama3' model not found! Please run 'ollama pull llama3' in your terminal.")
    else:
        print(f"\nWarning: Unexpected data structure from ollama.list(). Raw data: {models_data_raw}")

except Exception as e:
    print(f"\n Error connecting to Ollama: {e}")
    print("Please ensure the Ollama application is running on your computer.")

Attempting to connect to Ollama server...
Success! Connected to Ollama.

Available models: ['phi3:mini', 'phi3:latest', 'llama3:latest']
 Found 'llama3' model.


In [18]:
import ollama
import chromadb
from sentence_transformers import SentenceTransformer
import os
import json
import time # Added for potential delays

# --- Re-establish connections just in case kernel restarted ---
# print("Loading embedding model (all-MiniLM-L6-v2)...")
# embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# print("Connecting to vector database...")
# client = chromadb.PersistentClient(path=VECTOR_DIR)
# collection = client.get_collection(name="climate_laws_nap")
# print("Setup complete for RAG function.")
# # -----------------------------------------------------------

def get_local_rag_answer(query: str, country_filter: str = None):
    """
    Answers a query using Ollama and a local ChromaDB vectorstore.
    - If country_filter is provided, it filters for that specific country.
    - If country_filter is None, it searches all documents for general patterns.
    """

    query_embedding = embedder.encode([query])[0].tolist() 
    if country_filter:
        where_clause = {"country": country_filter}
        print(f"Retrieving specific context for: {country_filter}...")
    else:
        where_clause = None 
        print("Retrieving generalized context from all 6 countries...")

    try:
        results = collection.query(
            query_embeddings=[query_embedding],
            n_results=5,
            where=where_clause, 
            include=["documents", "metadatas", "distances"]
        )
    except Exception as e:
        print(f"Error querying ChromaDB: {e}")
        return "Error: Could not retrieve documents from the vector database."

    if not results or not results.get("documents") or not results["documents"][0]:
        print("Warning: No documents found matching the query criteria.")
        if country_filter:
            print("Trying generalized search instead...")
            return get_local_rag_answer(query, country_filter=None)
        else:
             return "Sorry, I couldn't find any relevant information for that query, even by analogy."
    docs = results["documents"][0]
    metas = results["metadatas"][0]
    dists = results["distances"][0]


    context_blocks = []
    for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists), start=1):
         if meta:
              source_info = f"[Source {i}: {meta.get('country', 'N/A')} ({meta.get('doc_type', 'N/A')}) | File: {meta.get('source_file', 'N/A')}]"
              context_blocks.append(f"{source_info}\n{doc}")
         else:
              context_blocks.append(f"[Source {i}: Metadata missing]\n{doc}")


    context = "\n\n---\n\n".join(context_blocks)

    system_prompt = """
    You are an expert global climate policy analyst. You will be given a user's question
    and a set of text chunks from the climate laws and National Adaptation Plans (NAPs)
    of 6 representative countries: Bangladesh (Asia), Chile (South America), Fiji (Oceania), 
    Kenya (Africa), Pakistan (Asia), South Africa (Africa).

    Your task is to answer the user's question using ONLY the provided text chunks (context).

    **Reasoning Process:**

    1.  **Identify Target:** Determine if the question is about a specific country *within* the context (e.g., Kenya) or a country *outside* the context (e.g., India).
    2.  **Specific Question:** If the question is about a country *within* the context (e.g., Kenya), primarily use sources from that specific country found in the context.
    3.  **Analogical Question:** If the question is about a country *outside* the context (e.g., India):
        * **Prioritize Region:** First, look for examples in the context from countries in the *same continent/region* as the target country (e.g., for India, prioritize context from Bangladesh and Pakistan).
        * **Synthesize Patterns:** Combine the relevant regional examples (if any) with common patterns found across *all* provided sources to create a generalized, analogous answer. Do not just focus on one country's example.
        * **State Analogy:** Clearly state that the answer is an analogy based on available examples.
    4.  **Citation:** You MUST cite facts in your answer using the [Source #] tags exactly as provided in the context (e.g., [Source 1], [Source 3]). Cite *after each piece of information*.
    5.  **Conciseness & Factuality:** Be factual, concise, and use bullet points.
    6.  **No Information:** If the context does not contain relevant information to answer the question (even by analogy), state clearly that the information is not available in the provided documents. Do not invent information.
    """

    user_prompt = f"Question: {query}\n\nContext:\n{context}\n\nAnswer:"

    print("Sending context to Ollama (llama3)...")
    try:
        response = ollama.chat(
            model="phi3:mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ]
        )
        return response['message']['content']

    except Exception as e:
        print(f"--- OLLAMA ERROR ---")
        print(f"Error: {e}")
        print("Please ensure the Ollama application is running and you have pulled the 'llama3' model.")
        print("Run 'ollama pull llama3' in your terminal if needed.")
        time.sleep(2) 
        return None

In [None]:
query_india = "What are adaptation priorities for a country like India, based on regional examples?"

answer_india = get_local_rag_answer(query_india)

if answer_india:
    print("\n--- GENERALIZED ANSWER (for India) ---")
    print(answer_india)

In [21]:
import chromadb
from sentence_transformers import SentenceTransformer
import os

test_query = "What are the priority sectors for climate adaptation in Kenya?"
test_query_embedding = embedder.encode([test_query])[0].tolist()

print(f"--- Testing retrieval for: '{test_query}' WITH Kenya filter ---")
try:
    results_kenya = collection.query(
        query_embeddings=[test_query_embedding],
        n_results=3,
        where={"country": "Kenya"},
        include=["documents", "metadatas", "distances"]
    )

    if results_kenya and results_kenya.get("documents") and results_kenya["documents"][0]:
        print("Found results specifically for Kenya:")
        for i, (doc, meta, dist) in enumerate(zip(results_kenya["documents"][0], results_kenya["metadatas"][0], results_kenya["distances"][0])):
            print(f"\nResult {i+1} (Distance: {dist:.4f})")
            print(f"Source: {meta.get('source_file', 'N/A')}")
            print(f"Text: {doc[:300]}...") 
            print("-" * 20)
    else:
        print(" No results found specifically for Kenya with this query.")
        print("Raw results:", results_kenya)

except Exception as e:
    print(f" Error during direct retrieval test: {e}")

--- Testing retrieval for: 'What are the priority sectors for climate adaptation in Kenya?' WITH Kenya filter ---
Found results specifically for Kenya:

Result 1 (Distance: 0.2487)
Source: NAP_Kenya.txt
Text: Figure 11: Kenya’s Adaptation theory of change
ADAPTATION INDICATORS
National Sector County
• Human development • Number of sectors • Number of counties that have
index planning, budgeting integrated climate change
and implementing adaptation in their CIDPs
• Percentage of climate
climate change
rel...
--------------------

Result 2 (Distance: 0.2576)
Source: NAP_Kenya.txt
Text: Preface
Charles Sunkuli, Principal Secretary - State Department of Environment
This National Adaptation Plan (NAP 2015-2030) is a critical response
to the climate change challenge facing our country. The NAP is
Kenya’s first plan on adaptation, and demonstrates our commitment
to operationalise the N...
--------------------

Result 3 (Distance: 0.2641)
Source: NAP_Kenya.txt
Text: Endnotes
1 15th Session of 

In [None]:
query_kenya = "What are the specific climate change adaptation sectors mentioned in Kenya's NAP?"

answer_kenya = get_local_rag_answer(query_kenya, country_filter="Kenya")

if answer_kenya:
    print("\n--- SPECIFIC ANSWER (for Kenya) ---")
    print(answer_kenya)

In [23]:
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

query = "What are Londond's main climate adaptation priority sectors?"

q_emb = embedder.encode([query])[0]

results = collection.query(
    query_embeddings=[q_emb],
    n_results=5,
    include=["documents","metadatas","distances"],

)

for doc, meta, dist in zip(results["documents"][0], results["metadatas"][0], results["distances"][0]):
    print(f"[{meta['country']} | {meta['doc_type']} | {meta['source_file']}]  score={dist:.4f}")
    print(doc[:400].replace("\n", " "))
    print("-"*90)

[Pakistan | NAP | NAP_Pakistan.txt]  score=0.4164
vel of coordination between the MoCC&EC and the provincial governments was essential, because it serves as the cornerstone of effective climate governance and implementation of the NAP countrywide. 33  34  4. Adaptation Priorities NATIONAL ADAPTATION PLANP A K I S T A N 2 0 2 3 35
------------------------------------------------------------------------------------------
[Chile | LAW | Chile_LeyMarcoCambioClimatico_2022_es.txt]  score=0.4169
lopment of which shall be the responsibility of the Ministry of Agriculture; h) Fisheries and aquaculture, the development of which shall be the responsibility of the Ministry of Economy, Development, and Tourism; i) Cities, the development of which shall be the responsibility of the Ministry of Housing and Urban Development; j) Tourism, the development of which shall be the responsibility of the 
------------------------------------------------------------------------------------------
[Chile | NAP 