In [None]:
import os
from dotenv import load_dotenv
from pathlib import Path

# Load .env from parent directory if needed
env_path = Path(__file__).resolve().parent.parent / ".env"
load_dotenv(dotenv_path=env_path)

# Access variables
OPENAI_API_KEY        = os.getenv("OPENAI_API_KEY")
PINECONE_INDEX_NAME   = os.getenv("PINECONE_INDEX_NAME")
PINECONE_HOST         = os.getenv("PINECONE_HOST")
PINECONE_API_KEY      = os.getenv("PINECONE_API_KEY")
GEMINI_API_KEY        = os.getenv("GEMINI_API_KEY")
K_RETRIEVE            = int(os.getenv("K_RETRIEVE", 5))  # default to 5
OPENAI_API_KEY        = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:

JSON_PATH = "incisos-chunks.json"


import json
from tqdm import tqdm
from typing import List

import pinecone
import google.generativeai as genai
from sentence_transformers import SentenceTransformer

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê INIT MODELS ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
model = SentenceTransformer("dariolopez/bge-m3-es-legal-tmp-6")  # 1024-D
genai.configure(api_key=GEMINI_API_KEY)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê LOAD CORPUS ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def load_texts_from_json(json_path: str) -> List[str]:
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [entry["text"].strip() for entry in data if "text" in entry]

ARTICULOS = load_texts_from_json(JSON_PATH)
print(f"üìö  Loaded {len(ARTICULOS):,} art√≠culos")


# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê EMBEDDING FUNCTION (E5) ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def embed_texts(texts: List[str]) -> List[List[float]]:
    formatted = [f"passage: {text}" for text in texts]
    return model.encode(formatted, show_progress_bar=True)

print("üîß  Generating embeddings ‚Ä¶")
EMBEDS = embed_texts(ARTICULOS)
assert len(EMBEDS[0]) == 1024, "‚ùå Embedding dim mismatch!"


# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê PINECONE SETUP ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(name=PINECONE_INDEX_NAME, host=PINECONE_HOST)

def upsert_vectors(texts: List[str],
                   vecs: List[List[float]],
                   batch: int = 100):
    for i in tqdm(range(0, len(texts), batch), desc="‚¨ÜÔ∏è  Upserting"):
        batch_vecs = [
            {
                "id": f"id-{j}",
                "values": vecs[j],
                "metadata": {"text": texts[j]}
            }
            for j in range(i, min(i + batch, len(texts)))
        ]
        index.upsert(vectors=batch_vecs)

print("üì§  Uploading to Pinecone ‚Ä¶")
upsert_vectors(ARTICULOS, EMBEDS)
    

  from .autonotebook import tqdm as notebook_tqdm


üìö  Loaded 199 art√≠culos
üîß  Generating embeddings ‚Ä¶


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:07<00:00,  1.14s/it]


üì§  Uploading to Pinecone ‚Ä¶


‚¨ÜÔ∏è  Upserting: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:03<00:00,  1.51s/it]


In [2]:

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê RETRIEVE FUNCTION  ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def retrieve(query: str, k: int = K_RETRIEVE) -> List[str]:
    query_vec = model.encode(f"query: {query}")
    res = index.query(vector=query_vec.tolist(), top_k=k, include_metadata=True)
    return [m.metadata["text"] for m in res.matches]


# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê GEMINI PRO RAG ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
gemini = genai.GenerativeModel(model_name="gemini-2.0-flash") 

def rag_answer(question: str) -> str:
    context = "\n\n".join(retrieve(question))
    prompt  = f"Contexto:\n{context}\n\nPregunta: {question}\nRespuesta:"
    return gemini.generate_content(prompt).text

In [3]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê TEST IT ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
q = "¬øCu√°les son las atribuciones del presidente de la Argentina?"
print("\nüîé Pregunta:", q)
print("\nüß† Respuesta (Gemini):\n", rag_answer(q))


üîé Pregunta: ¬øCu√°les son las atribuciones del presidente de la Argentina?

üß† Respuesta (Gemini):
 Bas√°ndonos en el texto proporcionado, las atribuciones del Presidente de la Naci√≥n Argentina son:

*   **Jefe Supremo de la Naci√≥n, Jefe del Gobierno y responsable pol√≠tico de la administraci√≥n general del pa√≠s.**
*   **Ejercer la administraci√≥n general del pa√≠s.**
*   **Efectuar los nombramientos de los empleados de la administraci√≥n, excepto los que correspondan al presidente.**
*   **Expedir los actos y reglamentos que sean necesarios para ejercer las facultades que le atribuye el art√≠culo y aquellas que le delegue el presidente de la Naci√≥n, con el refrendo del ministro secretario del ramo al cual el acto o reglamento se refiera.**
*   **Ejercer las funciones y atribuciones que le delegue el presidente de la Naci√≥n y, en acuerdo de gabinete resolver sobre las materias que le indique el Poder Ejecutivo, o por su propia decisi√≥n, en aquellas que por su importancia es

In [None]:
import json
import time
from ragas.evaluation import evaluate
from datasets import Dataset
from tqdm import tqdm
import numpy as np



def load_reference_answers(path):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return {item["question"]: item["answer"] for item in data}



# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# Construcci√≥n del dataset para RAGAS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def prepare_ragas_dataset(reference_answers: dict, tag: str = "") -> Dataset:
    records = []
    for question, reference in tqdm(reference_answers.items(), desc=f"üß™ Procesando {tag}"):
        retrieved = retrieve(question)
        answer = rag_answer(question)
        time.sleep(4)
        records.append({
            "question": question,
            "contexts": retrieved,
            "answer": answer,
            "reference": reference
        })
    return Dataset.from_list(records)


reference_easy = load_reference_answers("../easy_questions.json")
reference_hard = load_reference_answers("../hard_questions.json")

dataset_easy = prepare_ragas_dataset(reference_easy, tag="f√°ciles")
dataset_hard = prepare_ragas_dataset(reference_hard, tag="dif√≠ciles")

dataset_easy.to_json("ragas_easy.json", orient="records", lines=False)
dataset_hard.to_json("ragas_hard.json", orient="records", lines=False)




üß™ Procesando f√°ciles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [04:24<00:00,  5.29s/it]
üß™ Procesando dif√≠ciles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 65/65 [06:59<00:00,  6.46s/it]
Creating json from Arrow format: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 145.22ba/s]
Creating json from Arrow format: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 480.78ba/s]


478500

In [None]:
from ragas.embeddings.base import LangchainEmbeddingsWrapper
from sentence_transformers import SentenceTransformer

class CustomE5Embedding(LangchainEmbeddingsWrapper):
    def __init__(self, model_name="dariolopez/bge-m3-es-legal-tmp-6"):
        self.model = SentenceTransformer(model_name)

    def embed_query(self, texts):
        texts = [f"query: {text}" for text in texts]
        return self.model.encode(texts, convert_to_tensor=False)

    def embed_documents(self, texts):
        texts = [f"passage: {text}" for text in texts]
        return self.model.encode(texts, convert_to_tensor=False)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from ragas.metrics import (
    LLMContextPrecisionWithoutReference,
    LLMContextPrecisionWithReference,
    NonLLMContextPrecisionWithReference,


    ResponseRelevancy,
    LLMContextRecall,
    Faithfulness
)
from ragas.run_config import RunConfig
from ragas.embeddings.base import embedding_factory
from datasets import load_dataset
import os
from datasets import Dataset
from ragas.evaluation import evaluate


os.environ["OPENAI_API_KEY"] = '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'

# Load datasets
ragas_easy:Dataset = load_dataset("json", data_files="ragas_easy.json", split="train")
ragas_hard:Dataset = load_dataset("json", data_files="ragas_hard.json", split="train")

# Run config
run_config = RunConfig()
custom_embeddings = CustomE5Embedding() 

metrics = [
    LLMContextPrecisionWithReference(),
    LLMContextRecall(),
    Faithfulness()
]

print("\nüìä Evaluando preguntas F√ÅCILES:")
result_easy = evaluate(
    ragas_easy,
    metrics=metrics,
    run_config=run_config,
    batch_size=1
)
print("‚úÖ Resultados EASY:", result_easy)

print("\nüìä Evaluando preguntas DIF√çCILES:")
result_hard = evaluate(
    ragas_hard,
    metrics=metrics,
    run_config=run_config,
    batch_size=1
)
print("‚úÖ Resultados HARD:", result_hard)




üìä Evaluando preguntas F√ÅCILES:


Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [25:39<00:00, 10.26s/it]


‚úÖ Resultados EASY: {'llm_context_precision_with_reference': 0.6391, 'context_recall': 0.7400, 'faithfulness': 0.9058}

üìä Evaluando preguntas DIF√çCILES:


Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 195/195 [47:53<00:00, 14.74s/it]


‚úÖ Resultados HARD: {'llm_context_precision_with_reference': 0.8095, 'context_recall': 0.8759, 'faithfulness': 0.8608}


In [None]:
import csv
import numpy as np
from pathlib import Path

# CSV path
csv_path = Path("../results.csv")
write_header = not csv_path.exists()

metric_names = [
    "llm_context_precision_with_reference",
    "context_recall",
    "faithfulness"
]

easy_scores = [np.mean(result_easy[m]) for m in metric_names]
hard_scores = [np.mean(result_hard[m]) for m in metric_names]

with open(csv_path, mode='a', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    if write_header:
        writer.writerow(["experiment", "dataset"] + metric_names)

    writer.writerow(["experiment_5", "easy"] + easy_scores)
    writer.writerow(["experiment_5", "hard"] + hard_scores)
