<a href="https://colab.research.google.com/github/galenzo17/AI-personal-test/blob/main/really_bench.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instalación de bibliotecas
!pip install transformers
!pip install datasets
!pip install evaluate

# Importación de bibliotecas
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from datasets import load_dataset
import evaluate
import pandas as pd

# Definición de modelos
model_names = [
    "gpt2",
    "EleutherAI/gpt-neo-1.3B",
    "facebook/opt-125m"
]

# Carga de modelos y tokenizadores
models = {}
tokenizers = {}

for name in model_names:
    print(f"Cargando modelo: {name}")
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForCausalLM.from_pretrained(name)
    models[name] = model
    tokenizers[name] = tokenizer

# Definición de preguntas
questions = [
    "¿Cuál es la capital de Francia?",
    "Explica la teoría de la relatividad de Einstein.",
    "¿Cuáles son los beneficios de la inteligencia artificial?",
    "Describe el proceso de fotosíntesis.",
    "¿Qué es el aprendizaje profundo?"
]

# Función para generar respuestas
def generate_response(model, tokenizer, prompt, max_length=100):
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    outputs = model.generate(inputs, max_length=max_length,
                             num_return_sequences=1,
                             no_repeat_ngram_size=2,
                             early_stopping=True)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Generación de respuestas
responses = {}

for name in model_names:
    print(f"Generando respuestas con el modelo: {name}")
    responses[name] = []
    for question in questions:
        response = generate_response(models[name], tokenizers[name], question)
        responses[name].append(response)

# Respuestas de referencia
reference_answers = [
    "La capital de Francia es París.",
    "La teoría de la relatividad de Einstein describe la gravedad como una propiedad de la curvatura del espacio-tiempo.",
    "Los beneficios de la inteligencia artificial incluyen automatización, mejora en la toma de decisiones y avances en diversas industrias.",
    "La fotosíntesis es el proceso mediante el cual las plantas convierten la luz solar en energía química.",
    "El aprendizaje profundo es una subárea del aprendizaje automático que utiliza redes neuronales profundas para modelar y resolver problemas complejos."
]

# Evaluación de respuestas
bleu = evaluate.load('bleu')
rouge = evaluate.load('rouge')

metrics_df = pd.DataFrame(columns=['Modelo', 'Pregunta', 'BLEU', 'ROUGE'])

for name in model_names:
    for i, question in enumerate(questions):
        generated = responses[name][i]
        reference = reference_answers[i]

        # Evaluar BLEU
        bleu_score = bleu.compute(predictions=[generated], references=[[reference]])['bleu']

        # Evaluar ROUGE
        rouge_score = rouge.compute(predictions=[generated], references=[reference])
        rouge_l = rouge_score['rougeL'].fmeasure

        # Añadir al DataFrame
        metrics_df = metrics_df.append({
            'Modelo': name,
            'Pregunta': question,
            'BLEU': bleu_score,
            'ROUGE': rouge_l
        }, ignore_index=True)

# Mostrar métricas
print(metrics_df)

# Métricas promedio por modelo
average_metrics = metrics_df.groupby('Modelo').mean().reset_index()
print(average_metrics)

# Visualización (opcional)
import matplotlib.pyplot as plt
import seaborn as sns

sns.barplot(x='Modelo', y='BLEU', data=average_metrics)
plt.title('Puntuación BLEU Promedio por Modelo')
plt.show()

sns.barplot(x='Modelo', y='ROUGE', data=average_metrics)
plt.title('Puntuación ROUGE Promedio por Modelo')
plt.show()

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xx

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Cargando modelo: EleutherAI/gpt-neo-1.3B


tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

Cargando modelo: facebook/opt-125m


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/251M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generando respuestas con el modelo: gpt2


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Generando respuestas con el modelo: EleutherAI/gpt-neo-1.3B


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
