In [1]:
from langchain import hub
from langchain.schema import StrOutputParser, Document
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers import ParentDocumentRetriever
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_ollama import OllamaLLM
from langchain.embeddings import HuggingFaceBgeEmbeddings
from concurrent.futures import ThreadPoolExecutor
from langchain.document_loaders.base import BaseLoader
from langchain.docstore.document import Document
from typing import List, Optional
from functools import lru_cache
from langchain.storage import InMemoryStore
import time
import re
import os
import numpy as np

In [2]:
phi= "phi3:mini"
llama = "llama3.2:latest"
url_llm = "http://localhost:11434"

In [3]:
phi_llm = OllamaLLM(model=phi, base_url=url_llm, temperature=0)
llama_llm = OllamaLLM(model=llama, base_url=url_llm, temperature=0)

In [23]:
template = """
You are an assistant for question-answering tasks and an expert in research projects funded by the European Union under the Horizon 2020 programme.
Use the following context from Horizon 2020 projects to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

Question: {question}

Context: {context}

Answer:
"""

prompt_evaluation = """
    You are a rigorous evaluator. You will compare a system-generated answer to the known correct answer and evaluate its accuracy.

    INSTRUCTIONS:
    1. Analyze the question, the correct answer, and the generated answer.
    2. Evaluate EXCLUSIVELY the match between the generated answer and the correct answer.
    3. Assigns a score from 1 to 5 based on this scale:

    1 = Completely incorrect (no match in any respect)
    2 = Mostly incorrect (minimal match)
    3 = Partially correct (some elements match)
    4 = Mostly correct (only minor errors)
    5 = Completely correct (exact match or equivalent).

    DATA:
    Question: {question}
    Correct answer: {answer_correct}
    Response generated: {answer}

    STRICT OUTPUT FORMAT (no deviations):
    Justification: [1-2 sentence concise explanation of differences/similarities]
    Score: [Integer from 1 to 5]
    """
prompt_evaluation_with_context ="""
You are an advanced evaluator. You must evaluate:
1. The accuracy of the generated answer compared to the correct answer.
2. The relevance of the context used.

INSTRUCTIONS:
- Analyse the question, the correct answer, the generated answer and the context.
- Evaluate whether the context adequately supports the answer.
- Assign two scores (1-10):

- Quality of the answer:
1 = Totally incorrect
10 = Totally correct.

- Quality of the context:
1 = Not at all relevant
10 = Totally relevant

DATA:
Question: {question}
Correct answer: {answer_correct}
Response generated: {answer}
Context used: {context}

STRICT OUTPUT FORMAT:
Response justification: [Concise explanation]
Score response: [1-10]
Score context: [1-10]

The output has to be ONLY THE STRICT OUTPUT FORMAT, with the Score of the answer and the context.
"""

filter_prompt = PromptTemplate.from_template("""
You are an assistant that determines whether a document provides relevant information to answer a question.

If the document contains any information that helps partially or fully answer the question, answer "Yes". 
If it is irrelevant, answer "No".

ONLY answer with "Yes" or "No" — no other words or punctuation.

Question: {question}
Document: {document}
Answer:
""")

prompt = ChatPromptTemplate.from_template(template)
prompt_evaluation = ChatPromptTemplate.from_template(prompt_evaluation)
prompt_evaluation_with_context = ChatPromptTemplate.from_template(prompt_evaluation_with_context)
filter_chain = LLMChain(llm=llama_llm, prompt=filter_prompt)

In [5]:
query_1 = "What is the objective of the project with grant agreement 740934?"
query_2 = "What is the total cost of the project with the acronym HYPERGRYD?"
query_3 = "How much funding was allocated for the project titled Transforming Research through Innovative Practices for Linked interdisciplinary Exploration?"
query_4 = "Which organisation played the role of coordinator in the grant agreement 777998?"
query_5 = "What topic does the project with the acronym INTERRFACE belong to?"
query_6 = "What legal basis was the project titled European Joint Programme on Radioactive Waste Management framed within?"
query_7 = "What type of proposal was the grant agreement 814416?"
query_8 = "To which master call was the project with the acronym G9NIGHT submitted?"
query_9 = "To which sub call was the project titled Electron Nanocrystallography submitted?"
query_10 = "Provide the grant agreement of 1 project which objective is related to artificial intelligence."
query_11 = "Provide the acronym of 1 project which objective is related to robotics."
query_12 = "Provide the title of 1 project which objective is related to geolocation."
query_13 = "Provide the objective of 1 project related to digital twin."
query_14 = "Provide the objective of 3 different projects related to corrosion."
query_15 = "Provide the title of 3 different projects which objective is related to offshore structures."
query_16 = "Provide the acronym of 3 different projects which objective is related to materials engineering."
query_17 = "Provide the grant agreement of 3 different projects which objective is related to nanocomposites."
query_18 = "Provide the name of an organisation that has participated in projects which objective is related to artificial intelligence."
query_19 = "Provide the name of an organisation which activity type is PRC and that has participated in projects which objective is related to robotics."
query_20 = "Provide the PIC of an organisation that is a small or medium enterprise and has participated in projects which objective is related to geolocation."
query_21 = "Provide the name of an organisation that has played the role of coordinator in projects which objective is related to digital twin."
query_22 = "Provide the PIC of a Spanish organisation that has participated in projects which objective is related to corrosion."
query_23 = "Provide the name of an european organisation that has participated in projects which objective is related to offshore structures."
query_24 = "Provide the PIC of an european small or medium enterprise that has participated in projects which objective is related to materials engineering."
query_25 = "Provide the name of an european small or medium enterprise that has played the role of coordinator in projects which objective is related to nanocomposites."

In [6]:
answer_1_correct="The project’s objective is to combat violent extremism by analyzing its root causes, developing preventive and repressive measures, and countering extremist narratives through collaboration with civil society and LEAs, all while upholding fundamental rights."
answer_2_correct ="The total cost of the project with the acronym HYPERGRYD (grant agreement 101036656) was €5,987,875.00."
answer_3_correct ="Transforming Research through Innovative Practices for Linked interdisciplinary Exploration” (TRIPLE), identified by grant agreement 863420, received a total EU contribution of € 5,626,548.75. This funding was allocated as part of Horizon 2020 under the “EXCELLENT SCIENCE – Research Infrastructures"
answer_4_correct ="""the role of coordinator in the grant agreement 777998. The participation cost of the organisation with PIC 960782479 in the grant agreement 777998.
The organisation with Participant Identification Code (PIC) 960782479 participated in the grant agreement 777998. The name of this organisation is UNIVERSIDADE NOVA DE LISBOA. The organisation with PIC 960782479 is not a small or medium-sized enterprise. The organisation with PIC 960782479 develops an activity of type HES. The organisation with PIC 960782479 is based in the country PT, codified under ISO 3166. The organisation with PIC 960782479 played the role of coordinator in the grant agreement 777998. The participation cost of the organisation with PIC 960782479 in the grant agreement 777998 was 409500.0 euros. The total amount funded to the organization with PIC 960782479 in the grant agreement 777998 was 409500.0 euros.
"""
answer_5_correct ="The grant agreement 824330 was framed within the topic LC-SC3-ES-5-2018-2020TSO – DSO – Consumer: Large-scale demonstrations of innovative grid services through demand response, storage and small-scale (RES) generation. The grant agreement 824330 was framed within the master call H2020-LC-SC3-2018-2019-2020. The grant agreement 824330 was framed within the subcall H2020-LC-SC3-2018-ES-SCC."
answer_6_correct ="The grant agreement 847593 was framed within the legal basis H2020-EuratomEuratom."
answer_7_correct ="The grant agreement 814416 was a Research and Innovation Action (RIA) proposal."
answer_8_correct ="The grant agreement 101036041 was framed within the master call H2020-MSCA-NIGHT-2020bis."
answer_9_correct ="The grant agreement 956099 was framed within the legal basis H2020-EU.1.3.EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions."

## MODELO sentence-transformers/paraphrase-MiniLM-L6-v2

In [7]:
model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
emb = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

  emb = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)


## EXPERIMENTO 7

In [8]:
vectorstore_7_txt = FAISS.load_local("faiss_index_proyectos_7_txt", embeddings=emb, 
                                 allow_dangerous_deserialization=True)

In [9]:
retriever_multi_7_txt = MultiQueryRetriever.from_llm(
    retriever=vectorstore_7_txt.as_retriever(search_kwargs={"k": 3}),
    llm=phi_llm,
    include_original=True
)

In [10]:
resultados = {}

for i in range(1, 10):
    inicio = time.time()
    current_query = eval(f"query_{i}")
    filtered_docs = []
    all_docs = []
    def format_docs(docs):
        all_docs.clear()
        all_docs.extend(docs) 
        
        filtered_docs.clear() 
        for doc in docs:
            response = filter_chain.invoke({
                "question": current_query,
                "document": doc.page_content
            })
            respuesta_texto = response["text"]
            print(f"\n--------\n {respuesta_texto} \n--------\n")
            if respuesta_texto.lower().startswith(("sí", "yes")):
                filtered_docs.append(doc)
        return "\n\n".join(doc.page_content for doc in filtered_docs)

    rag_chain_multi_7_txt = (
        {
            "context": retriever_multi_7_txt | format_docs,
            "question": RunnablePassthrough()
        }
        | prompt
        | phi_llm
        | StrOutputParser()
    )
    
    # Ejecutar RAG con los docs filtrados
    answer = rag_chain_multi_7_txt.invoke(current_query)
    fin = time.time()
    tiempo_ejecucion = fin - inicio

    # Almacenar todos los datos en el diccionario
    resultados[f"query_{i}"] = {
        "query": current_query,
        "answer": answer,
        "all_docs": all_docs,
        "filtered_docs": filtered_docs
    }

    print(f"Query {i}: Tiempo = {tiempo_ejecucion:.4f} s")
    print(f"La respuesta es: {answer}\n\n---\n\n")


--------
 No 
--------


--------
 No 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------
 No 
--------


--------
 No 
--------


--------
 No 
--------


--------
 Yes 
--------

Query 1: Tiempo = 337.3938 s
La respuesta es: The objective of grant agreement 740934 under Horizon 2020 was to provide a conceptual basis for smart provision of public goods by EU agriculture and forestry ecosystems in the context of intensification scenarios. It aimed at offering tools, evidence, policy options, and improved incentives through transdisciplinary approaches while considering legislation impact across multiple scales within thirteen European countries.

---



--------
 No 
--------


--------
 No 
--------


--------
 No 
--------


--------
 No 
--------


--------
 Yes 
--------


--------
 No 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------
 Yes 
--------


--------


In [11]:
evaluaciones = {}  # Diccionario para guardar los resultados de la evaluación
num_queries=9
for i in range(1, num_queries + 1):
    # Obtener datos del diccionario `resultados`
    question = resultados[f"query_{i}"]["query"]
    answer = resultados[f"query_{i}"]["answer"]
    answer_correct = eval(f"answer_{i}_correct")
    prompt_evaluation=prompt_evaluation.format(question=question, answer=answer, answer_correct=answer_correct)
    response = llama_llm.invoke(prompt_evaluation)
    evaluaciones[f"query_{i}"] = response

    # Mostrar resultados
    print(f"\n--- Evaluación para Query {i} ---")
    print(f"Pregunta: {question}")
    print(f"Respuesta correcta: {answer_correct}")
    print(f"Respuesta generada: {answer}")
    print(f"Evaluación del LLM:\n{response}")
    print("---\n")


--- Evaluación para Query 1 ---
Pregunta: What is the objective of the project with grant agreement 740934?
Respuesta correcta: The project’s objective is to combat violent extremism by analyzing its root causes, developing preventive and repressive measures, and countering extremist narratives through collaboration with civil society and LEAs, all while upholding fundamental rights.
Respuesta generada: The objective of grant agreement 740934 under Horizon 2020 was to provide a conceptual basis for smart provision of public goods by EU agriculture and forestry ecosystems in the context of intensification scenarios. It aimed at offering tools, evidence, policy options, and improved incentives through transdisciplinary approaches while considering legislation impact across multiple scales within thirteen European countries.
Evaluación del LLM:
Justification:
The generated answer is completely unrelated to the correct answer, as it discusses EU agriculture and forestry ecosystems in the 


--- Evaluación para Query 8 ---
Pregunta: To which master call was the project with the acronym G9NIGHT submitted?
Respuesta correcta: The grant agreement 101036041 was framed within the master call H2020-MSCA-NIGHT-2020bis.
Respuesta generada: The project G9NIGHT was submitted to master call H2020-MSCA-IF-2020.
Evaluación del LLM:
Justification:
The generated answer is completely unrelated to the correct answer, as it discusses EU agriculture and forestry ecosystems in the context of Horizon 2020, whereas the correct answer focuses on combating violent extremism through collaboration with civil society and LEAs. The two objectives are distinct and unrelated.

Score: 1
---


--- Evaluación para Query 9 ---
Pregunta: To which sub call was the project titled Electron Nanocrystallography submitted?
Respuesta correcta: The grant agreement 956099 was framed within the legal basis H2020-EU.1.3.EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions.
Respuesta generada: The Electron Nanocrystallo

In [12]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [13]:
evaluaciones_with_context = {}
puntuaciones = {}  # Nuevo diccionario para almacenar los scores
num_queries = 2

for i in range(1, num_queries + 1):
    question = resultados[f"query_{i}"]["query"]
    answer = resultados[f"query_{i}"]["answer"]
    answer_correct = eval(f"answer_{i}_correct")
    context=format_docs(resultados[f"query_{i}"]["filtered_docs"])
    prompt_filled = prompt_evaluation_with_context.format(
        question=question,
        answer=answer,
        answer_correct=answer_correct,
        context=context
    )
    response = llama_llm.invoke(prompt_filled)
    evaluaciones_with_context[f"query_{i}"] = response
    
    print(f"\n--- Evaluación para Query {i} ---")
    print(f"Pregunta: {question}")
    print(f"Respuesta correcta: {answer_correct}")
    print(f"Respuesta generada: {answer}")
    print(f"Evaluación del LLM:\n{response}")
    print("---\n")
    score_respuesta = re.search(r"**Score respuesta**: (\d+)", response).group(1)
    score_contexto = re.search(r"**Score contexto**: (\d+)", response).group(1)

    # Convertir a enteros
    score_respuesta = int(score_respuesta)
    score_contexto = int(score_contexto)

    print("Score respuesta:", score_respuesta)  # Output: 1
    print("Score contexto:", score_contexto)   


--- Evaluación para Query 1 ---
Pregunta: What is the objective of the project with grant agreement 740934?
Respuesta correcta: The project’s objective is to combat violent extremism by analyzing its root causes, developing preventive and repressive measures, and countering extremist narratives through collaboration with civil society and LEAs, all while upholding fundamental rights.
Respuesta generada: The objective of grant agreement 740934 under Horizon 2020 was to provide a conceptual basis for smart provision of public goods by EU agriculture and forestry ecosystems in the context of intensification scenarios. It aimed at offering tools, evidence, policy options, and improved incentives through transdisciplinary approaches while considering legislation impact across multiple scales within thirteen European countries.
Evaluación del LLM:
**Respuesta**

**Justificación de la respuesta**

La respuesta generada no se ajusta a la pregunta original. La respuesta correcta habla sobre co

NameError: name 'respuesta_llm' is not defined

In [40]:
def extract_score(response_text, keyword):
    pattern = rf"(?i)\*{{0,3}}score[_ ]{re.escape(keyword)}\*{{0,3}}\s*[:=]\s*([0-9]+)"
    
    match = re.search(pattern, response_text)
    if match:
        return int(match.group(1))
    else:
        raise AttributeError(f"Score for '{keyword}' not found in text.")

In [27]:
evaluaciones_with_context = {}
puntuaciones = {} 
num_queries = 9

for i in range(1, num_queries + 1):
    question = resultados[f"query_{i}"]["query"]
    answer = resultados[f"query_{i}"]["answer"]
    answer_correct = eval(f"answer_{i}_correct")
    context = format_docs(resultados[f"query_{i}"]["filtered_docs"])
    
    prompt_filled = prompt_evaluation_with_context.format(
        question=question,
        answer=answer,
        answer_correct=answer_correct,
        context=context
    )
    
    response = llama_llm.invoke(prompt_filled)
    evaluaciones_with_context[f"query_{i}"] = response
    
    print(f"\n--- Evaluación para Query {i} ---")
    print(f"Pregunta: {question}")
    print(f"Respuesta correcta: {answer_correct}")
    print(f"Respuesta generada: {answer}")
    print(f"Evaluación del LLM:\n{response}")
    print("---\n")
    
    # Extraer scores de la respuesta del LLM (response, no respuesta_llm)
    try:
        score_respuesta = extract_score(response, "response")
        score_contexto = extract_score(response, "context")
    except AttributeError:
        print(f"Error: No se encontraron scores en la respuesta para Query {i}")
        score_respuesta = None
        score_contexto = None
    
    # Guardar en el diccionario de puntuaciones
    puntuaciones[f"query_{i}"] = {
        "score_respuesta": score_respuesta,
        "score_contexto": score_contexto
    }
    
    print("Score respuesta:", score_respuesta)
    print("Score contexto:", score_contexto)
    print("\n")


--- Evaluación para Query 1 ---
Pregunta: What is the objective of the project with grant agreement 740934?
Respuesta correcta: The project’s objective is to combat violent extremism by analyzing its root causes, developing preventive and repressive measures, and countering extremist narratives through collaboration with civil society and LEAs, all while upholding fundamental rights.
Respuesta generada: The objective of grant agreement 740934 under Horizon 2020 was to provide a conceptual basis for smart provision of public goods by EU agriculture and forestry ecosystems in the context of intensification scenarios. It aimed at offering tools, evidence, policy options, and improved incentives through transdisciplinary approaches while considering legislation impact across multiple scales within thirteen European countries.
Evaluación del LLM:
Response_justification: The generated answer is completely unrelated to the correct answer and does not even mention the grant agreement 740934. 


--- Evaluación para Query 8 ---
Pregunta: To which master call was the project with the acronym G9NIGHT submitted?
Respuesta correcta: The grant agreement 101036041 was framed within the master call H2020-MSCA-NIGHT-2020bis.
Respuesta generada: The project G9NIGHT was submitted to master call H2020-MSCA-IF-2020.
Evaluación del LLM:
Response_justification: The generated answer is partially correct, as it mentions the master call H2020-MSCA-IF-2020, but incorrectly includes "H2020-MSCA-NIGHT-2020bis" which is not present in the provided data. The correct master call for G9NIGHT is actually H2020-MSCA-NIGHT-2020bis.
Score response: 6
Score context: 8
---

Score respuesta: 6
Score contexto: 8



--- Evaluación para Query 9 ---
Pregunta: To which sub call was the project titled Electron Nanocrystallography submitted?
Respuesta correcta: The grant agreement 956099 was framed within the legal basis H2020-EU.1.3.EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions.
Respuesta generada: The Elect

In [28]:
scores_respuesta = [v['score_respuesta'] for v in puntuaciones.values() if v['score_respuesta'] is not None]
scores_contexto = [v['score_contexto'] for v in puntuaciones.values() if v['score_contexto'] is not None]

max_score = 10
scores_respuesta_norm = [score / max_score for score in scores_respuesta] 
scores_contexto_norm = [score / max_score for score in scores_contexto]   

# Calcular métricas
mean_respuesta = np.mean(scores_respuesta_norm) 
mean_contexto = np.mean(scores_contexto_norm)

print(f"Nota media de las respuestas: {mean_respuesta:.3f}")
print(f"Nota media de los contextos seleccionados: {mean_contexto:.3f}") 

Nota media de las respuestas: 0.425
Nota media de los contextos seleccionados: 0.713


In [42]:
prompt_evaluation_extended = """
You are an advanced evaluator. You must evaluate:

1. Faithfulness: Is the generated answer fully supported by the retrieved context? (Score 1–10)
2. Coverage: Does the retrieved context contain enough information to correctly answer the question? (Score 1–10)
3. Fluency: Is the generated answer clear, coherent, and well-written? (Score 1–10)

INSTRUCTIONS:
- Read the question, the correct answer, the generated answer, and the context.
- Assign three scores between 1 and 10.

DATA:
Question: {question}
Correct answer: {answer_correct}
Generated answer: {answer}
Context: {context}

STRICT OUTPUT FORMAT:
Justification: [Concise explanation]
Score_faithfulness: [1–10]
Score_coverage: [1–10]
Score_fluency: [1–10]

The output has to be ONLY THE STRICT OUTPUT FORMAT.
"""


evaluaciones_extended = {}
puntuaciones_extended = {}
num_queries = 10

for i in range(1, num_queries + 1):
    question = resultados[f"query_{i}"]["query"]
    answer = resultados[f"query_{i}"]["answer"]
    answer_correct = eval(f"answer_{i}_correct")
    context = format_docs(resultados[f"query_{i}"]["filtered_docs"])
    
    prompt_filled = prompt_evaluation_extended.format(
        question=question,
        answer=answer,
        answer_correct=answer_correct,
        context=context
    )
    response = llama_llm.invoke(prompt_filled)
    evaluaciones_extended[f"query_{i}"] = response
    
    print(f"\n--- Evaluación extendida para Query {i} ---")
    print(f"Pregunta: {question}")
    print(f"Respuesta correcta: {answer_correct}")
    print(f"Respuesta generada: {answer}")
    print(f"Evaluación del LLM:\n{response}")
    print("---\n")
    
    try:
        score_faithfulness = extract_score(response, "faithfulness")
        score_coverage = extract_score(response, "coverage")
        score_fluency = extract_score(response, "fluency")
    except AttributeError:
        print(f"Error: No se encontraron scores en la respuesta para Query {i}")
        score_faithfulness = None
        score_coverage = None
        score_fluency = None



--- Evaluación extendida para Query 1 ---
Pregunta: What is the objective of the project with grant agreement 740934?
Respuesta correcta: The project’s objective is to combat violent extremism by analyzing its root causes, developing preventive and repressive measures, and countering extremist narratives through collaboration with civil society and LEAs, all while upholding fundamental rights.
Respuesta generada: The objective of grant agreement 740934 under Horizon 2020 was to provide a conceptual basis for smart provision of public goods by EU agriculture and forestry ecosystems in the context of intensification scenarios. It aimed at offering tools, evidence, policy options, and improved incentives through transdisciplinary approaches while considering legislation impact across multiple scales within thirteen European countries.
Evaluación del LLM:
Justification: The generated answer is not fully supported by the retrieved context, as it mentions a different project objective and s


--- Evaluación extendida para Query 8 ---
Pregunta: To which master call was the project with the acronym G9NIGHT submitted?
Respuesta correcta: The grant agreement 101036041 was framed within the master call H2020-MSCA-NIGHT-2020bis.
Respuesta generada: The project G9NIGHT was submitted to master call H2020-MSCA-IF-2020.
Evaluación del LLM:
Score_faithfulness: 6
Score_coverage: 8
Score_fluency: 4
---


--- Evaluación extendida para Query 9 ---
Pregunta: To which sub call was the project titled Electron Nanocrystallography submitted?
Respuesta correcta: The grant agreement 956099 was framed within the legal basis H2020-EU.1.3.EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions.
Respuesta generada: The Electron Nanocrystallography project was submitted to subcall H2020-MSCA-IF-2020 within master call H2020-MSCA-IF-2018.
Evaluación del LLM:
Justification: The generated answer was partially supported by the retrieved context, as it mentioned the correct subcall (H2020-MSCA-IF-2020) but in

KeyError: 'query_10'

In [36]:
def calcular_global_score(score_faithfulness, score_coverage, score_fluency):
    """
    Calcula el global_score a partir de los tres scores.
    Devuelve un diccionario con todos los scores y el global.
    """
    if None not in (score_faithfulness, score_coverage, score_fluency):
        global_score = (
            0.4 * score_faithfulness +
            0.4 * score_coverage +
            0.2 * score_fluency
        )
    else:
        global_score = None
    
    return {
        "score_faithfulness": score_faithfulness,
        "score_coverage": score_coverage,
        "score_fluency": score_fluency,
        "global_score": global_score
    }

In [43]:
scores_dict = calcular_global_score(score_faithfulness, score_coverage, score_fluency)

puntuaciones_extended[f"query_{i}"] = scores_dict


print("Score faithfulness:", scores_dict["score_faithfulness"])
print("Score coverage:", scores_dict["score_coverage"])
print("Score fluency:", scores_dict["score_fluency"])
print("Global score:", scores_dict["global_score"])

Score faithfulness: 6
Score coverage: 9
Score fluency: 7
Global score: 7.4
