In [1]:
#%pip install langchain

from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders.parsers import RapidOCRBlobParser
from langchain.docstore.document import Document
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas import SingleTurnSample
from ragas.metrics import LLMContextPrecisionWithoutReference
from ragas import SingleTurnSample
from ragas.metrics import BleuScore
from ragas.metrics import AspectCritic
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness
from langchain_community.document_loaders import PyPDFLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.
  from .autonotebook import tqdm as notebook_tqdm


# CREACION DE RAG PARA PDF

https://docs.ragas.io/en/stable/getstarted/rag_eval/#analyze-results

In [11]:
class RAG:
    RAG_TEMPLATE = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

<context>
{context}
</context>

Answer the following question:

{question}
"""

    def __init__(self, model="llama3", base_url="https://ollama.gsi.upm.es/"):
        # Inicializa el modelo de lenguaje y las embeddings
        self.model = ChatOllama(model=model, base_url=base_url)
        self.embeddings = OllamaEmbeddings(model="nomic-embed-text")
        self.vectorstore = None
        self.docs = None
        self.prompt_template = ChatPromptTemplate.from_template(self.RAG_TEMPLATE)

    def load_documents(self, file_path, mode="page", images_inner_format="markdown-img"):
        """
        Carga documentos desde un archivo PDF, los divide en fragmentos (chunks) 
        y crea un vectorstore basado en las embeddings.
        """
        # Carga del PDF utilizando PyPDFLoader
        loader = PyPDFLoader(file_path, mode=mode, images_inner_format=images_inner_format)
        self.docs = loader.load()
        # Divide el contenido en chunks de 500 caracteres sin solapamiento
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
        all_splits = text_splitter.split_documents(self.docs)
        # Crea el vectorstore a partir de los fragmentos y las embeddings
        self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=self.embeddings)

    def get_most_relevant_docs(self, query):
        """
        Recupera los documentos más relevantes para la consulta utilizando el vectorstore.
        """
        if not self.vectorstore:
            raise ValueError("No se han cargado documentos. Ejecuta load_documents primero.")
        retriever = self.vectorstore.as_retriever()
        return retriever.get_relevant_documents(query)

    def generate_answer(self, query, relevant_docs):
        """
        Genera una respuesta para la consulta, usando los documentos relevantes.
        """
        # Une el contenido de los documentos en un bloque de texto
        context = "\n\n".join(doc.page_content for doc in relevant_docs)
        # Rellena la plantilla del prompt con el contexto y la pregunta
        prompt = self.prompt_template.format(context=context, question=query)
        messages = [
            ("system", "You are an assistant that answers questions based solely on the provided context."),
            ("human", prompt)
        ]
        ai_msg = self.model.invoke(messages)
        return ai_msg.content


In [12]:
rag = RAG()

In [13]:
# Load documents
rag.load_documents(file_path = "./jc3prueba.pdf")

In [14]:
questions =["¿Cuál es el propósito principal del JC3IEDM?",
            "¿Cuáles son las dos categorías principales de 'objeto' que abarca el diseño del modelo?",
            "¿Qué papel juega la entidad 'REPORTING-DATA' en el JC3IEDM?"]
ground_truth = ["El JC3IEDM tiene como propósito principal definir la información que se va a intercambiar entre sistemas de mando y control automatizados (C2IS) para lograr la interoperabilidad. También puede servir como base coherente para otros mecanismos de intercambio de información.",
                "El diseño del modelo abarca dos categorías de objetos: aquellos que pueden ser identificados individualmente (OBJECT-ITEM) y aquellos que representan propiedades agrupadas o de clase (OBJECT-TYPE). La vinculación de un ítem a un tipo es obligatoria en el modelo",
                "'REPORTING-DATA' especifica la fuente, calidad y momento de los datos reportados. Permite comparar diferentes informes y mantener un registro histórico de la información"
    
]

In [15]:
dataset = []

for query,reference in zip(questions,ground_truth):

    relevant_docs = rag.get_most_relevant_docs(query)
    response = rag.generate_answer(query, relevant_docs)
    dataset.append(
        {
            "user_input":query,
            "retrieved_contexts":relevant_docs,
            "response":response,
            "reference":reference
        }
    )

  return retriever.get_relevant_documents(query)


In [16]:
dataset

[{'user_input': '¿Cuál es el propósito principal del JC3IEDM?',
  'retrieved_contexts': [Document(id='921c4df6-241b-4d6d-b45c-69f2fb4781ec', metadata={'author': 'MIP DMWG', 'creationdate': '2007-02-13T11:46:49+01:00', 'creator': 'Acrobat PDFMaker 7.0.7 for Word', 'moddate': '2024-12-11T11:35:47+01:00', 'page': 3, 'page_label': '4', 'producer': 'Acrobat Distiller 7.0.5 (Windows)', 'source': './jc3prueba.pdf', 'subject': 'Overview of JC3IEDM Specification', 'title': 'JC3IEDM', 'total_pages': 82}, page_content='Figure 37.  High-Level View of JC3IEDM ................................................................................... 68'),
   Document(id='ee5f5d93-8acb-4c4f-aaf7-466e90d873b3', metadata={'author': 'MIP DMWG', 'creationdate': '2007-02-13T11:46:49+01:00', 'creator': 'Acrobat PDFMaker 7.0.7 for Word', 'moddate': '2024-12-11T11:35:47+01:00', 'page': 3, 'page_label': '4', 'producer': 'Acrobat Distiller 7.0.5 (Windows)', 'source': './jc3prueba.pdf', 'subject': 'Overview of JC3IEDM

In [17]:
from ragas import EvaluationDataset

# Convierte los documentos a strings usando el atributo page_content
for sample in dataset:
    sample['retrieved_contexts'] = [doc.page_content for doc in sample['retrieved_contexts']]

evaluation_dataset = EvaluationDataset.from_list(dataset)


In [18]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper

evaluator_llm = LangchainLLMWrapper(ChatOllama(model="llama3", base_url= "https://ollama.gsi.upm.es/" ))
evaluator_embeddings = LangchainEmbeddingsWrapper(OllamaEmbeddings(model="nomic-embed-text"))

from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

result = evaluate(dataset=evaluation_dataset,metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],llm=evaluator_llm)
result

Evaluating: 100%|████████████████████████████████████████████████████████████████████████| 9/9 [00:51<00:00,  5.68s/it]


{'context_recall': 0.8889, 'faithfulness': 1.0000, 'factual_correctness(mode=f1)': 0.3767}

In [19]:
import os
os.environ["RAGAS_APP_TOKEN"] = "apt.4054-53fd2731274f-4395-87c1-2cd16721-ebca3"

In [20]:
result.upload()

Evaluation results uploaded! View at https://app.ragas.io/dashboard/alignment/evaluation/fa02a105-833d-48e6-a00a-b8bbbcd9c755


'https://app.ragas.io/dashboard/alignment/evaluation/fa02a105-833d-48e6-a00a-b8bbbcd9c755'

# GENERACION DE UN TESTSET 

https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/#generate-testset

In [21]:
generator_llm = LangchainLLMWrapper(ChatOllama(model="llama3", base_url="https://ollama.gsi.upm.es/"))
generator_embeddings = LangchainEmbeddingsWrapper(OllamaEmbeddings(model="nomic-embed-text"))


In [22]:
from ragas.testset.graph import KnowledgeGraph

kg = KnowledgeGraph()

In [23]:
# 1. Cargar documentos usando la clase RAG
rag = RAG()
rag.load_documents(file_path="./jc3prueba.pdf")
docs = rag.docs  # Lista de documentos cargados

# 2. Crear un KnowledgeGraph vacío e incorporar los documentos como nodos
from ragas.testset.graph import KnowledgeGraph, Node, NodeType

kg = KnowledgeGraph()
print("KnowledgeGraph inicial:", kg)  # Debería mostrar: KnowledgeGraph(nodes: 0, relationships: 0)

for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={
                "page_content": doc.page_content,
                "document_metadata": doc.metadata
            }
        )
    )
print("KnowledgeGraph tras agregar documentos:", kg)  # Ahora debería mostrar varios nodos (por ejemplo, 10)

# 3. Definir un prompt personalizado para extraer headlines
from langchain.prompts import ChatPromptTemplate

headlines_extractor_prompt = ChatPromptTemplate.from_template(
    """
You are a headlines extractor for document nodes. Extract a concise headline from the given page_content.
Return your answer as a JSON object with the key "headlines". 
For example, if the content is:
"Joint Consultation Command & Control Information Exchange Data Model (JC3IEDM) facilitates interoperability..."
Then your output should be:
{"headlines": "Interoperability in JC3IEDM"}
If no clear headline is found, return {"headlines": ""}.
"""
)

# 4. Obtener y aplicar las transformaciones al KnowledgeGraph
from ragas.testset.transforms import default_transforms, apply_transforms

# Usamos el mismo LLM y modelo de embeddings que para la generación del testset
transformer_llm = generator_llm
embedding_model = generator_embeddings

# Obtener las transformaciones por defecto
trans = default_transforms(documents=docs, llm=transformer_llm, embedding_model=embedding_model)

# Inyectar el prompt personalizado para la transformación de headlines (si existe)
for transform in trans:
    if hasattr(transform, "name") and transform.name == "headlines_extractor":
        transform.prompt = headlines_extractor_prompt

apply_transforms(kg, trans)

# 5. Guardar y recargar el KnowledgeGraph (opcional pero útil para verificar el enriquecimiento)
kg.save("knowledge_graph.json")


KnowledgeGraph inicial: KnowledgeGraph(nodes: 0, relationships: 0)
KnowledgeGraph tras agregar documentos: KnowledgeGraph(nodes: 82, relationships: 0)


unable to apply transformation: 'headlines' property not found in this node                     | 0/82 [00:00<?, ?it/s]
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' 

In [24]:
loaded_kg = KnowledgeGraph.load("knowledge_graph.json")
print("KnowledgeGraph cargado:", loaded_kg)  # Debería mostrar el número actualizado de nodos y relaciones

# 6. Crear el TestsetGenerator usando el KnowledgeGraph enriquecido
from ragas.testset import TestsetGenerator
from ragas.testset.synthesizers import default_query_distribution

generator = TestsetGenerator(
    llm=generator_llm,
    embedding_model=embedding_model,
    knowledge_graph=loaded_kg
)

# 7. Definir la distribución de queries a generar (usando la distribución por defecto)
from ragas.testset.synthesizers import default_query_distribution

query_distribution = default_query_distribution(generator_llm)
print("Distribución original de queries:", query_distribution)

# Filtrar la distribución para eliminar el sintetizador multi_hop_specific_query_synthesizer
filtered_query_distribution = [
    (synth, prob) for synth, prob in query_distribution 
    if synth.name != "multi_hop_specific_query_synthesizer"
]

print("Distribución filtrada de queries:", filtered_query_distribution)



KnowledgeGraph cargado: KnowledgeGraph(nodes: 139, relationships: 3726)
Distribución original de queries: [(SingleHopSpecificQuerySynthesizer(name='single_hop_specifc_query_synthesizer', llm=LangchainLLMWrapper(langchain_llm=ChatOllama(...)), generate_query_reference_prompt=QueryAnswerGenerationPrompt(instruction=Generate a single-hop query and answer based on the specified conditions (persona, term, style, length) and the provided context. Ensure the answer is entirely faithful to the context, using only the information directly from the provided context.### Instructions:
1. **Generate a Query**: Based on the context, persona, term, style, and length, create a question that aligns with the persona's perspective and incorporates the term.
2. **Generate an Answer**: Using only the content from the provided context, construct a detailed answer to the query. Do not add any information not included in or inferable from the context.
, examples=[(QueryCondition(persona=Persona(name='Software

In [25]:
# Generar el testset usando la distribución filtrada
testset = generator.generate(testset_size=10, query_distribution=filtered_query_distribution)



Generating personas: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  1.77it/s]
Generating Scenarios: 100%|█████████████████████████████████████████████████████████████| 2/2 [09:34<00:00, 287.11s/it]
Generating Samples: 100%|████████████████████████████████████████████████████████████████| 8/8 [00:17<00:00,  2.18s/it]


In [26]:
# Convertir a DataFrame para visualizar resultados
df = testset.to_pandas()
print(df)

                                          user_input  \
0         what does ACTION mean in JC3IEDM overview?   
1   What is the role of ACTION in the JC3IEDM model?   
2  What is the capability referred to in military...   
3  What does CAPABILITY refer to in the context o...   
4  What are some examples of independent entities...   
5  What are the dynamics involved in identifying ...   
6  What are some independent entities that can be...   
7  What dynamics govern the relationships between...   

                                  reference_contexts  \
0  [JC3IEDM OVERVIEW – UK – DMWG 16 February 2007...   
1  [JC3IEDM OVERVIEW – UK – DMWG 16 February 2007...   
2  [Dynamics (How, what, when something is to be ...   
3  [Dynamics (How, what, when something is to be ...   
4  [<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...   
5  [<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...   
6  [<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...   
7  [<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 

In [27]:
df

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,what does ACTION mean in JC3IEDM overview?,[JC3IEDM OVERVIEW – UK – DMWG 16 February 2007...,"ACTION refers to an activity, or the occurrenc...",single_hop_specifc_query_synthesizer
1,What is the role of ACTION in the JC3IEDM model?,[JC3IEDM OVERVIEW – UK – DMWG 16 February 2007...,"ACTION is an activity, or the occurrence of an...",single_hop_specifc_query_synthesizer
2,What is the capability referred to in military...,"[Dynamics (How, what, when something is to be ...",The capability refers to the potential ability...,single_hop_specifc_query_synthesizer
3,What does CAPABILITY refer to in the context o...,"[Dynamics (How, what, when something is to be ...",CAPABILITY refers to the potential ability to ...,single_hop_specifc_query_synthesizer
4,What are some examples of independent entities...,[<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...,"Independent entities, as defined in Table 9, i...",multi_hop_abstract_query_synthesizer
5,What are the dynamics involved in identifying ...,[<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...,The dynamics involved in identifying independe...,multi_hop_abstract_query_synthesizer
6,What are some independent entities that can be...,[<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...,Independent entities that can be used as candi...,multi_hop_abstract_query_synthesizer
7,What dynamics govern the relationships between...,[<1-hop>\n\nJC3IEDM OVERVIEW – UK – DMWG 16 Fe...,The dynamics that govern the relationships bet...,multi_hop_abstract_query_synthesizer


In [28]:
os.environ["RAGAS_APP_TOKEN"] = "apt.4054-53fd2731274f-4395-87c1-2cd16721-ebca3"
testset.upload()

Testset uploaded! View at https://app.ragas.io/dashboard/alignment/testset/1bd690a2-e994-4c43-86a3-e182eca45bd7


'https://app.ragas.io/dashboard/alignment/testset/1bd690a2-e994-4c43-86a3-e182eca45bd7'

En la salida aparecen 4 columnas:
- user input que hace referencia a la pregunta introducida por el usuario
- reference_context son el conjunto de fragmentos o extractos de texto que se han recuperado del documento y que sustentan o justifica la respuesta
- reference es la respuesta "ideal" o esperada. El ground truth 
- synthesizer_name. Es el nombre del sintetizador que se uso para crear la muestra

### Evaluación del dataset

In [29]:
dataset = Dataset.from_dict(df)
dataset

Dataset({
    features: ['user_input', 'reference_contexts', 'reference', 'synthesizer_name'],
    num_rows: 8
})

In [30]:
import pandas as pd
from datasets import Dataset

# Supongamos que 'df' es el DataFrame obtenido del testset, por ejemplo:
# df = testset.to_pandas()

result_list = []

for index, row in df.iterrows():
    user_input = row["user_input"]
    reference = row["reference"]
    
    # Recuperar los documentos relevantes a partir de la consulta del usuario
    relevant_docs = rag.get_most_relevant_docs(user_input)
    # Generar la respuesta usando los documentos relevantes
    response = rag.generate_answer(user_input, relevant_docs)
    
    # Extraer el contenido de los documentos relevantes
    retrieved_contexts = [doc.page_content for doc in relevant_docs]
    
    sample = {
        "user_input": user_input,
        "retrieved_contexts": retrieved_contexts,
        "response": response,
        "reference": reference
    }
    
    result_list.append(sample)

# Convertir la lista de diccionarios en un Dataset de Hugging Face
dataset = Dataset.from_dict({
    "user_input": [sample["user_input"] for sample in result_list],
    "retrieved_contexts": [sample["retrieved_contexts"] for sample in result_list],
    "response": [sample["response"] for sample in result_list],
    "reference": [sample["reference"] for sample in result_list]
})

print(dataset)

Dataset({
    features: ['user_input', 'retrieved_contexts', 'response', 'reference'],
    num_rows: 8
})


In [31]:
# Función que genera la referencia utilizando el LLM generador.
# Aquí se puede reutilizar la misma función que usas para 'generate_answer' o definir otra si la lógica debe ser distinta.
def generate_reference(query, relevant_docs):
    # Por ejemplo, se podría llamar al mismo método:
    return rag.generate_answer(query, relevant_docs)

dataset = []

for query in questions:
    # Recuperar documentos relevantes para la consulta
    relevant_docs = rag.get_most_relevant_docs(query)
    # Generar la respuesta (lo que el sistema haría en modo producción)
    response = rag.generate_answer(query, relevant_docs)
    # Generar la referencia usando el LLM generador (esta será la "ground truth" para la evaluación)
    reference = generate_reference(query, relevant_docs)
    
    sample = {
        "user_input": query,
        "retrieved_contexts": [doc.page_content for doc in relevant_docs],
        "response": response,
        "reference": reference
    }
    dataset.append(sample)

# Si prefieres trabajar con un EvaluationDataset de ragas:
from ragas import EvaluationDataset

evaluation_dataset = EvaluationDataset.from_list(dataset)


In [32]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

# Configurar el evaluador LLM y embeddings si es necesario
evaluator_llm = LangchainLLMWrapper(ChatOllama(model="llama3", base_url="https://ollama.gsi.upm.es/"))
# evaluator_embeddings si se requiere, de forma similar

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=evaluator_llm
)
print(result)


Evaluating: 100%|████████████████████████████████████████████████████████████████████████| 9/9 [00:46<00:00,  5.22s/it]


{'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness(mode=f1)': 0.8233}
