# Evaluacion cuantitativa del sistema

In [None]:
# CARGA DE DATOS PARA LA EVALUACION
import pandas as pd
import os

ruta_corpus_procesado = "C:\\Users\\alvar\\OneDrive\\Escritorio\\TFM\\data\\Procesado\\corpus_procesado.csv" 

try:
    df_corpus = pd.read_csv(ruta_corpus_procesado, dtype={'paperId': str})
    print(f"DataFrame cargado desde '{ruta_corpus_procesado}'. Contiene {len(df_corpus)} filas.")
    
    # Seleccionar y limpiar las columnas necesarias para la evaluación
    df_eval = df_corpus[['paperId', 'title','processed_text']].copy()
    df_eval.dropna(subset=['paperId', 'title','processed_text'], inplace=True)

    print(f"Se utilizarán {len(df_eval)} documentos con paperId, title y abstract válidos para la evaluación.")
    display(df_eval.head())

except FileNotFoundError:
    st.error(f"Error: No se encontró el archivo del corpus en la ruta: {ruta_corpus_procesado}")
    

DataFrame cargado desde 'C:\Users\alvar\OneDrive\Escritorio\TFM\data\Procesado\corpus_procesado.csv'. Contiene 1104 filas.
Se utilizarán 1104 documentos con paperId, title y abstract válidos para la evaluación.


Unnamed: 0,paperId,title,processed_text
0,4dc2617f15847af822d1f89c2e5cca39c8cdb7ad,Effect of a Machine Learning Recommender Syste...,effect of a machine learning recommender syste...
1,9778a564510da05080f978fcff23928ead0f1db9,A Machine Learning Recommender System to Tailo...,background and objectives nursing homes (nhs) ...
2,6a8a21cab225a428c41e3f8c38e18535f68ffacf,A Machine Learning Recommender System Based on...,changing and moving toward online shopping has...
3,fc88d1692a0f53f2821499fa8b8f4d049775585f,Matrix Factorization Collaborative-Based Recom...,saudi arabia’s tourism sector has recently sta...
4,9998dc44714a0721caa671243391c1ed5ecfa222,Smart Crop Recommender System-A Machine Learni...,machine learning has proven its efficacy in so...


In [2]:
# TOPIC MODELING CON BERTopic
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer

# 1. Preparar los datos
docs = df_eval.dropna(subset=['processed_text'])['processed_text'].tolist() # Extraer solo los abstracts no nulos
paper_ids_list = df_eval.dropna(subset=['processed_text'])['paperId'].tolist()

if not docs:
    print("No hay abstracts válidos para procesar.")
else:
    # 2. Seleccionar el modelo de embedding (uso el mismo que para el sistema de recomendacion)
    embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

    # Crear un vectorizador con stopwords en inglés
    vectorizer_model = CountVectorizer(stop_words="english")
    
    # 3. Inicializar BERTopic
    
    topic_model = BERTopic(
        embedding_model=embedding_model,
        language="english", 
        min_topic_size=35, # el número mínimo de documentos que debe tener un tópico. Ayuda a evitar tópicos muy chicos.
        vectorizer_model = vectorizer_model,
        verbose=True
    )
    
    # 4. Entrenar el modelo y asignar tópicos a cada documento
    print("Entrenando el modelo BERTopic y asignando tópicos...")
    topics, probabilities = topic_model.fit_transform(docs)
    
    # Añadir los resultados al DataFrame
    df_eval['topic_id'] = topics
    print("\nProceso finalizado. Se añadio la columna 'topic_id' al DataFrame.")
    display(df_eval.head())

  from .autonotebook import tqdm as notebook_tqdm
2025-06-07 18:53:53,180 - BERTopic - Embedding - Transforming documents to embeddings.


Entrenando el modelo BERTopic y asignando tópicos...


Batches: 100%|██████████| 35/35 [01:27<00:00,  2.51s/it]
2025-06-07 18:55:21,336 - BERTopic - Embedding - Completed ✓
2025-06-07 18:55:21,343 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2025-06-07 18:55:58,371 - BERTopic - Dimensionality - Completed ✓
2025-06-07 18:55:58,371 - BERTopic - Cluster - Start clustering the reduced embeddings
2025-06-07 18:55:58,553 - BERTopic - Cluster - Completed ✓
2025-06-07 18:55:58,568 - BERTopic - Representation - Fine-tuning topics using representation models.
2025-06-07 18:55:59,248 - BERTopic - Representation - Completed ✓



Proceso finalizado. Se añadio la columna 'topic_id' al DataFrame.


Unnamed: 0,paperId,title,processed_text,topic_id
0,4dc2617f15847af822d1f89c2e5cca39c8cdb7ad,Effect of a Machine Learning Recommender Syste...,effect of a machine learning recommender syste...,3
1,9778a564510da05080f978fcff23928ead0f1db9,A Machine Learning Recommender System to Tailo...,background and objectives nursing homes (nhs) ...,3
2,6a8a21cab225a428c41e3f8c38e18535f68ffacf,A Machine Learning Recommender System Based on...,changing and moving toward online shopping has...,3
3,fc88d1692a0f53f2821499fa8b8f4d049775585f,Matrix Factorization Collaborative-Based Recom...,saudi arabia’s tourism sector has recently sta...,3
4,9998dc44714a0721caa671243391c1ed5ecfa222,Smart Crop Recommender System-A Machine Learni...,machine learning has proven its efficacy in so...,3


In [None]:
# VISUALIZACION DE TOPICOS ENCONTRADOS
if 'topic_model' in locals():
    # El Tópico -1 es para los outliers
    print("--- Información de Tópicos Encontrados ---")
    display(topic_model.get_topic_info())

    # Muestra los documentos más representativos para cada tópico, util para ver de que va cada grupo y asignarles una consulta de prueba
    print("\n--- Documentos Representativos por Tópico ---")
    for topic_id in topic_model.get_topics().keys():
        if topic_id == -1: continue # Omitir outliers
        
        print(f"\n===== TÓPICO {topic_id} =====")
        representative_docs = topic_model.get_representative_docs(topic_id)
        
        # Imprimir las palabras clave del tópico
        keywords = ", ".join([word for word, score in topic_model.get_topic(topic_id)])
        print(f"Palabras Clave: {keywords}\n")

        # Imprimir un par de documentos representativos
        for doc in representative_docs[:2]: # Muestra los 2 más representativos
            print(f"  -> Documento Representativo: {doc[:2000]}...")
            
    # Visualización interactiva 
    topic_model.visualize_topics() # no funciona, VER
else:
    print("error: El modelo BERTopic no se ha entrenado correctamente. Verificar los datos de entrada.")

--- Información de Tópicos Encontrados ---


Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,0,185,0_cancer_cardiovascular_sports_diseases,"[cancer, cardiovascular, sports, diseases, exe...",[summary in 1995 the american college of sport...
1,1,178,1_learning_deep_image_medical,"[learning, deep, image, medical, vision, compu...","[in recent years, deep learning (dl) has been ..."
2,2,138,2_ai_search_academic_education,"[ai, search, academic, education, generative, ...",[artificial intelligence (ai) has brought trem...
3,3,107,3_recommender_recommendation_learning_machine,"[recommender, recommendation, learning, machin...",[recommender systems algorithms are generally ...
4,4,96,4_water_climate_change_management,"[water, climate, change, management, resource,...","[the choshui river basin, the mother river in ..."
5,5,91,5_energy_renewable_power_systems,"[energy, renewable, power, systems, sources, w...","[iran, endowed with abundant renewable and non..."
6,6,62,6_forecasting_economic_forecast_models,"[forecasting, economic, forecast, models, fore...",[confidence in macro-economic forecasting has ...
7,7,53,7_business_economy_circular_models,"[business, economy, circular, models, sharing,...",[the article focuses on a bottom-up approach t...
8,8,51,8_supply_chain_optimization_management,"[supply, chain, optimization, management, mode...","[over the years, the global supply chain has e..."
9,9,49,9_mental_health_chatbots_chatbot,"[mental, health, chatbots, chatbot, support, a...",[mental health issues are a growing concern wo...



--- Documentos Representativos por Tópico ---

===== TÓPICO 0 =====
Palabras Clave: cancer, cardiovascular, sports, diseases, exercise, research, oncology, clinical, data, studies

  -> Documento Representativo: summary in 1995 the american college of sports medicine and the centers for disease control and prevention published national guidelines on physical activity and public health. the committee on exercise and cardiac rehabilitation of the american heart association endorsed and supported these recommendations. the purpose of the present report is to update and clarify the 1995 recommendations on the types and amounts of physical activity needed by healthy adults to improve and maintain health. development of this document was by an expert panel of scientists, including physicians, epidemiologists, exercise scientists, and public health specialists. this panel reviewed advances in pertinent physiologic, epidemiologic, and clinical scientific data, including primary research artic

In [None]:
# CONSTRUCCION DEL GROUND TRUTH
objetivos_por_topico = {
    0: "Investigating evidence-based practices and research trends in clinical medicine, focusing on oncology, cardiovascular diseases, and sports medicine.",
    1: "Exploring the application of deep learning models for computer vision tasks, particularly in the field of medical image analysis.",
    2: "Analyzing the applications, impact, and ethical considerations of generative AI and NLP in higher education and academic research.",
    3: "Analyzing machine learning algorithms and evaluation methodologies for recommender systems across various domains.",
    4: "Assessing the impacts of climate change on water resources and the development of sustainable water management strategies.",
    5: "Investigating the integration, optimization, and policy frameworks for renewable energy systems.",
    6: "Evaluating and comparing statistical and machine learning models for economic forecasting.",
    7: "Analysis of business models for the circular economy and the sharing economy.",
    8: "Exploring the use of AI and data analytics for supply chain optimization.",
    9: "Assessing the application and effectiveness of AI-powered chatbots for mental health support.",
    10: "Using AI and data analytics to improve sports performance, team strategy, and injury prevention.",
    11: "Investigating advancements in robotics, including soft robotics and automation, for modern manufacturing and Industry 4.0."
}

# Construcción del diccionario 'ground_truth'
ground_truth = {}
for topic_id, objetivo in objetivos_por_topico.items():
    # Obtener los paperIds para el topic_id actual del DataFrame
    ids_del_topico = df_eval[df_eval['topic_id'] == topic_id]['paperId'].tolist()
    
    if ids_del_topico: # Solo añadir si el tópico tiene documentos
        ground_truth[objetivo] = ids_del_topico

print("\n--- diccionario 'ground_truth' generado exitosamente con los nuevos tópicos ---")

# Verificar el resultado
print(f"Se han creado {len(ground_truth)} pares de 'Objetivo de Estudio' -> 'Lista de PaperIDs Relevantes'.\n")

# Imprimir una muestra para verificar (el primer tópico)
if ground_truth:
    primer_objetivo = list(ground_truth.keys())[0]
    primeros_ids = ground_truth[primer_objetivo]
    print(f"Ejemplo de Tópico:")
    print(f"OBJETIVO DE PRUEBA: '{primer_objetivo}'")
    print(f"  -> NÚMERO DE DOCUMENTOS RELEVANTES (GROUND TRUTH): {len(primeros_ids)}")
    print(f"  -> EJEMPLO IDs: {primeros_ids[:3]}")


--- diccionario 'ground_truth' generado exitosamente con los nuevos tópicos ---
Se han creado 12 pares de 'Objetivo de Estudio' -> 'Lista de PaperIDs Relevantes'.

Ejemplo de Tópico:
OBJETIVO DE PRUEBA: 'Investigating evidence-based practices and research trends in clinical medicine, focusing on oncology, cardiovascular diseases, and sports medicine.'
  -> NÚMERO DE DOCUMENTOS RELEVANTES (GROUND TRUTH): 185
  -> EJEMPLO IDs: ['63fbfdb5b0a4bbf76ca0c4403bcc623803674600', '22aa29fa637ef0df2aa19282b31ea15c2b92b563', '4ba2e7c6249222b125348c4fbbeea0a57dd501cd']


In [None]:
# EVALUACION DEL SISTEMA

import sys
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
     sys.path.append(module_path)
from src.motor_de_recomendaciones import get_recommendations


# --- Paso 1: Configuración de la Evaluación ---
K = 50 # cantidad de recomendaciones a evaluar por consulta
evaluation_results = [] # aca guardo el resultado de cada consulta

print(f"Iniciando evaluación cuantitativa para {len(ground_truth)} tópicos...")
print(f"Se obtendrán las Top-{K} recomendaciones para cada objetivo de estudio.\n")

# --- Paso 2: Iterar sobre el Ground Truth y Evaluar cada Consulta ---
for query, relevant_doc_ids in ground_truth.items():
    print(f"--- Evaluando consulta: '{query[:80]}...'")
    
    # El ground truth de IDs para esta consulta
    relevant_set = set(relevant_doc_ids)
    
    # Obtener las Top-K recomendaciones usando el motor de recomendaciones
    recommended_papers = get_recommendations(user_query=query, top_n=K)
    
    # Extraer solo los paperIds de las recomendaciones
    recommended_ids = [rec['paperId'] for rec in recommended_papers]
    recommended_set = set(recommended_ids)
    
    # Calcular los aciertos
    hits = recommended_set.intersection(relevant_set)
    
    # --- Calcular Métricas para esta consulta ---
    
    # Precision
    precision_at_k = len(hits) / K if K > 0 else 0
    
    # Recall
    total_relevant_docs = len(relevant_set)
    recall_at_k = len(hits) / total_relevant_docs if total_relevant_docs > 0 else 0
    
    # F1-Score
    if (precision_at_k + recall_at_k) > 0:
        f1_at_k = 2 * (precision_at_k * recall_at_k) / (precision_at_k + recall_at_k)
    else:
        f1_at_k = 0
        
    print(f"  -> Resultados: {len(hits)} aciertos. Precision: {precision_at_k:.2f}, Recall: {recall_at_k:.2f}, F1-Score: {f1_at_k:.2f}")

    # Guardar los resultados
    evaluation_results.append({
        "Objetivo de Estudio (Query)": query,
        "Total Relevantes": total_relevant_docs,
        "Aciertos": len(hits),
        f"Precision": precision_at_k,
        f"Recall": recall_at_k,
        f"F1-Score": f1_at_k
    })

print("\n--- Evaluación Finalizada ---")

# --- Paso 3: mostrar los Resultados en una Tabla y Calcular Promedios ---
if evaluation_results:
    df_results = pd.DataFrame(evaluation_results)
    
    # Calcular promedios
    mean_precision = df_results[f'Precision'].mean()
    mean_recall = df_results[f'Recall'].mean()
    mean_f1_score = df_results[f'F1-Score'].mean()
    
    print("\n--- Tabla de Resultados por Consulta ---")
    with pd.option_context('display.max_rows', None, 'display.max_colwidth', None):   # Para mostrar la tabla completa en un notebook
        display(df_results)  
        
    print("\n--- Promedio de Métricas Globales ---")
    print(f"  - Precision Promedio: {mean_precision:.4f}")
    print(f"  - Recall Promedio:    {mean_recall:.4f}")
    print(f"  - F1-Score Promedio:   {mean_f1_score:.4f}")
else:
    print("No se generaron resultados de evaluación.")

Iniciando evaluación cuantitativa para 12 tópicos...
Se obtendrán las Top-50 recomendaciones para cada objetivo de estudio.

--- Evaluando consulta: 'Investigating evidence-based practices and research trends in clinical medicine,...'
  -> Resultados: 45 aciertos. Precision: 0.90, Recall: 0.24, F1-Score: 0.38
--- Evaluando consulta: 'Exploring the application of deep learning models for computer vision tasks, par...'
  -> Resultados: 50 aciertos. Precision: 1.00, Recall: 0.28, F1-Score: 0.44
--- Evaluando consulta: 'Analyzing the applications, impact, and ethical considerations of generative AI ...'
  -> Resultados: 50 aciertos. Precision: 1.00, Recall: 0.36, F1-Score: 0.53
--- Evaluando consulta: 'Analyzing machine learning algorithms and evaluation methodologies for recommend...'
  -> Resultados: 49 aciertos. Precision: 0.98, Recall: 0.46, F1-Score: 0.62
--- Evaluando consulta: 'Assessing the impacts of climate change on water resources and the development o...'
  -> Resultados: 49 a

Unnamed: 0,Objetivo de Estudio (Query),Total Relevantes,Aciertos,Precision,Recall,F1-Score
0,"Investigating evidence-based practices and research trends in clinical medicine, focusing on oncology, cardiovascular diseases, and sports medicine.",185,45,0.9,0.243243,0.382979
1,"Exploring the application of deep learning models for computer vision tasks, particularly in the field of medical image analysis.",178,50,1.0,0.280899,0.438596
2,"Analyzing the applications, impact, and ethical considerations of generative AI and NLP in higher education and academic research.",138,50,1.0,0.362319,0.531915
3,Analyzing machine learning algorithms and evaluation methodologies for recommender systems across various domains.,107,49,0.98,0.457944,0.624204
4,Assessing the impacts of climate change on water resources and the development of sustainable water management strategies.,96,49,0.98,0.510417,0.671233
5,"Investigating the integration, optimization, and policy frameworks for renewable energy systems.",91,43,0.86,0.472527,0.609929
6,Evaluating and comparing statistical and machine learning models for economic forecasting.,62,44,0.88,0.709677,0.785714
7,Analysis of business models for the circular economy and the sharing economy.,53,50,1.0,0.943396,0.970874
8,Exploring the use of AI and data analytics for supply chain optimization.,51,37,0.74,0.72549,0.732673
9,Assessing the application and effectiveness of AI-powered chatbots for mental health support.,49,47,0.94,0.959184,0.949495



--- Promedio de Métricas Globales ---
  - Precision Promedio: 0.9167
  - Recall Promedio:    0.6246
  - F1-Score Promedio:   0.7059
