# **DESCARGAS**

In [None]:
!pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
!pip3 install unsloth sentence-transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# **LORAS**

In [None]:
from datasets import load_dataset
from unsloth import FastLanguageModel
from transformers import TextStreamer
from difflib import SequenceMatcher
import random

# Carga del modelo
model_path = "/content/drive/MyDrive/Qwen2.5-0.5B-1ep-Server/loras/Qwen2.5-0.5B"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_path,
    max_seq_length=4096,
    dtype=None,
    load_in_4bit=False,
)

In [None]:
# Función principal: diccionario/traductor experto en llionés
def lliones_expert(message: str) -> str:
    try:
        messages = [
            {"role": "system", "content": "Eres un Diccionario/Traductor experto en Leonés"},
            {"role": "user", "content": message},
        ]

        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        outputs = model.generate(**inputs, max_new_tokens=256)
        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Procesar respuesta (opcional)
        if "assistant" in decoded:
            response = decoded.split("assistant")[-1].strip()
        else:
            response = decoded.strip()

        return response

    except Exception as e:
        return f"❌ Error: {str(e)}"

In [None]:
import time
import random
from difflib import SequenceMatcher
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util

# Cargar modelo de embeddings semánticos
print("🔄 Cargando modelo de embeddings...")
model_emb = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Cargar dataset desde Hugging Face
dataset = load_dataset("unileon-robotics/lliones-dict-tr", split="train")
dataset = dataset.shuffle(seed=42).select(range(2000))  # Muestra reproducible

# Función de similitud difusa
def fuzzy_similarity(a: str, b: str) -> float:
    return SequenceMatcher(None, a.lower().strip(), b.lower().strip()).ratio() * 100

# Función de similitud semántica
def semantic_similarity(a: str, b: str) -> float:
    emb1 = model_emb.encode(a, convert_to_tensor=True)
    emb2 = model_emb.encode(b, convert_to_tensor=True)
    score = util.cos_sim(emb1, emb2).item()
    return score * 100

In [None]:
results = []

# Evaluación
for i, sample in enumerate(dataset):
    entrada = sample["input"]
    esperado = sample["output"]

    predicho = lliones_expert(entrada)

    fuzzy = fuzzy_similarity(predicho, esperado)
    semantic = semantic_similarity(predicho, esperado)
    similitud = max(fuzzy, semantic)  # Combinado: usamos el más alto

    print(f"Ejemplo {i+1}")

    if random.random() > 0.01:
        print(f"\n📌 Ejemplo {i+1}")
        print(f"🔹 Entrada     : {entrada}")
        print(f"✅ Esperado    : {esperado}")
        print(f"🤖 Predicho    : {predicho}")
        print(f"📊 Fuzzy       : {fuzzy:.2f}%")
        print(f"💡 Semántica   : {semantic:.2f}%")
        print(f"🏁 Final (max) : {similitud:.2f}%")

    results.append(similitud)

In [None]:
# Estadísticas finales
media_similitud = sum(results) / len(results)
print(f"\n📈 Similitud media en {len(results)} ejemplos: {media_similitud:.2f}%")

In [None]:
# Contador por rangos
from collections import Counter

# Definir rangos (de 5 en 5)
rangos = [(i, i + 5) for i in range(0, 100, 5)]  # [(0,5), (5,10), ..., (95,100)]
contador_rangos = Counter()

for sim in results:
    for (rango_min, rango_max) in rangos:
        if rango_min <= sim < rango_max:
            etiqueta = f"{rango_min:02d}-{rango_max:02d}"
            contador_rangos[etiqueta] += 1
            break
    else:
        if sim >= 100:
            contador_rangos["100"] += 1  # Para valores exactamente 100

# Mostrar resultados ordenados
print("\n📊 Distribución de similitudes por rango:")
for rango in sorted(contador_rangos.keys(), reverse=True):
    cantidad = contador_rangos[rango]
    porcentaje = (cantidad / len(results)) * 100
    print(f"  {rango}%: {cantidad} ejemplos ({porcentaje:.1f}%)")

## Codigo para probar el Modelo Loras

In [None]:
# # 🔁 Bucle interactivo de chat para modelo LoRA
# print("🦙 Chat con tu modelo LoRA. Escribe 'salir' para terminar.\n")

# while True:
#     user_input = input("🧑 Tú: ")
#     if user_input.lower() in ["salir", "exit", "quit"]:
#         print("👋 Hasta pronto.")
#         break

#     respuesta = lliones_expert(user_input)
#     print(f"🤖 LLM: {respuesta}\n")

# **GGUF**

In [None]:
from llama_cpp import Llama
from datasets import load_dataset
from difflib import SequenceMatcher
from collections import Counter
from sentence_transformers import SentenceTransformer, util
import random
import time
import json
import csv
from pathlib import Path

# ---------- Configuración ----------
# Tamaño de muestra del dataset (ajústalo si quieres)
SAMPLE_SIZE = 3000

# Rutas de los modelos (en el orden que pasaste)
MODEL_PATHS = [

    ##### Q5_K_M #####

    # "/content/drive/MyDrive/Qwen2.5-0.5B-1ep/outputs/gguf/Qwen2.5-0.5B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-0.5B-3ep/outputs/gguf/Qwen2.5-0.5B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-0.5B-5ep/outputs/gguf/Qwen2.5-0.5B/unsloth.Q5_K_M.gguf",

    # "/content/drive/MyDrive/Qwen2.5-1.5B-1ep/outputs/gguf/Qwen2.5-1.5B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-1.5B-3ep/gguf/Qwen2.5-1.5B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-1.5B-5ep/gguf/Qwen2.5-1.5B/unsloth.Q5_K_M.gguf",

    # "/content/drive/MyDrive/Qwen2.5-3B-1ep/outputs/gguf/Qwen2.5-3B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-3B-3ep/outputs/gguf/Qwen2.5-3B/unsloth.Q5_K_M.gguf",
    # "/content/drive/MyDrive/Qwen2.5-3B-5ep/outputs/gguf/Qwen2.5-3B/unsloth.Q5_K_M.gguf",

    ##### F16 #####

    ## "/content/drive/MyDrive/Qwen2.5-0.5B-1ep/outputs/gguf/Qwen2.5-0.5B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-0.5B-3ep/outputs/gguf/Qwen2.5-0.5B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-0.5B-5ep/outputs/gguf/Qwen2.5-0.5B/unsloth.F16.gguf",

    ## "/content/drive/MyDrive/Qwen2.5-1.5B-1ep/outputs/gguf/Qwen2.5-1.5B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-1.5B-3ep/gguf/Qwen2.5-1.5B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-1.5B-5ep/gguf/Qwen2.5-1.5B/unsloth.F16.gguf",

    ## "/content/drive/MyDrive/Qwen2.5-3B-1ep/outputs/gguf/Qwen2.5-3B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-3B-3ep/outputs/gguf/Qwen2.5-3B/unsloth.F16.gguf",
    ## "/content/drive/MyDrive/Qwen2.5-3B-5ep/outputs/gguf/Qwen2.5-3B/unsloth.F16.gguf",
]

# Parámetros de inferencia llama.cpp (ajústalos si lo necesitas)
LLAMA_KWARGS = dict(
    n_ctx=2048,
    n_threads=4,
    n_gpu_layers=-1,
    verbose=True,
)

# Semilla para reproducibilidad del muestreo del dataset
RANDOM_SEED = 42

# ---------- Guardar resumen comparativo en Drive ----------
OUT_DIR = Path("/content/drive/MyDrive/lliones_eval/F16/Qwen2.5-3B-3ep")
OUT_DIR.mkdir(parents=True, exist_ok=True)

csv_path = OUT_DIR / "lliones_eval_summary.csv"
json_path = OUT_DIR / "lliones_eval_summary.json"

# ---------- Cargar modelo semántico (una sola vez) ----------
semantic_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


In [None]:
# ---------- Cargar dataset y seleccionar muestra reproducible ----------
dataset = load_dataset("unileon-robotics/lliones-dict-tr", split="train")
dataset = dataset.shuffle(seed=RANDOM_SEED).select(range(min(SAMPLE_SIZE, len(dataset))))

def fuzzy_similarity(a: str, b: str) -> float:
    return SequenceMatcher(None, a.lower().strip(), b.lower().strip()).ratio() * 100

def semantic_similarity(a: str, b: str) -> float:
    embeddings = semantic_model.encode([a, b], convert_to_tensor=True)
    sim = util.cos_sim(embeddings[0], embeddings[1]).item()
    return sim * 100  # porcentaje

def stats_distribution(lista):
    """Devuelve un dict con la distribución en rangos 00-05, 05-10, ..., 95-100/100."""
    rangos = [(i, i + 5) for i in range(0, 100, 5)]
    contador = Counter()
    for sim in lista:
        placed = False
        for (rmin, rmax) in rangos:
            if rmin <= sim < rmax:
                etiqueta = f"{rmin:02d}-{rmax:02d}"
                contador[etiqueta] += 1
                placed = True
                break
        if not placed:
            if sim >= 100:
                contador["100"] += 1
    return dict(contador)

def print_stats(lista):
    media = sum(lista) / len(lista)
    print(f"\n📈 Similitud media (máx fuzzy/semántica): {media:.2f}%")
    dist = stats_distribution(lista)
    print("\n📊 Distribución de similitudes:")
    for rango in sorted(dist.keys(), reverse=True):
        cantidad = dist[rango]
        porcentaje = (cantidad / len(lista)) * 100
        print(f"  {rango}%: {cantidad} ejemplos ({porcentaje:.1f}%)")
    return media, dist

In [None]:
def build_llm(model_path: str) -> Llama:
    return Llama(model_path=model_path, **LLAMA_KWARGS)

def lliones_expert_gguf(message: str, llm: Llama) -> str:
    # Prompt tipo chat Qwen
    prompt = (
        "<|im_start|>system\n"
        "Eres un Diccionario/Traductor experto en leonés.<|im_end|>\n"
        "<|im_start|>user\n"
        f"{message}<|im_end|>\n"
        "<|im_start|>assistant\n"
    )
    output = llm(prompt, max_tokens=200)
    respuesta = output["choices"][0]["text"].strip()
    return respuesta

def evaluate_model(model_path: str, dataset, sample_print_prob: float = 0.01):
    print(f"\n==============================")
    print(f"🚀 Evaluando modelo: {model_path}")
    print(f"==============================")

    llm = build_llm(model_path)
    final_scores = []
    fuzzy_scores = []
    semantic_scores = []
    printed = 0

    t0 = time.time()
    for i, sample in enumerate(dataset):
        entrada = sample["input"]
        esperado = sample["output"]
        predicho = lliones_expert_gguf(entrada, llm=llm)

        fuzzy_sim = fuzzy_similarity(predicho, esperado)
        semantic_sim = semantic_similarity(predicho, esperado)
        final_sim = max(fuzzy_sim, semantic_sim)
        print(f"\nEjemplo {i+1}\n")
        if random.random() < sample_print_prob:
            printed += 1
            print(f"🔹 Entrada   : {entrada}")
            print(f"✅ Esperado  : {esperado}")
            print(f"🤖 Predicho  : {predicho}")
            print(f"📊 Fuzzy     : {fuzzy_sim:.2f}%")
            print(f"🧠 Semántico : {semantic_sim:.2f}%")
            print(f"🏁 Final     : {final_sim:.2f}%")

        final_scores.append(final_sim)
        fuzzy_scores.append(fuzzy_sim)
        semantic_scores.append(semantic_sim)

    elapsed = time.time() - t0

    # Medias
    mean_final = sum(final_scores) / len(final_scores)
    mean_fuzzy = sum(fuzzy_scores) / len(fuzzy_scores)
    mean_semantic = sum(semantic_scores) / len(semantic_scores)

    # Estadísticas de la métrica final (como hasta ahora)
    media_impresa, dist = print_stats(final_scores)
    # Asegura que lo impreso coincide con mean_final (por claridad)
    print(f"\n🔁 Comprobación medias -> Final: {mean_final:.2f}% | Fuzzy: {mean_fuzzy:.2f}% | Semántica: {mean_semantic:.2f}%")
    print(f"⏱️ Tiempo total: {elapsed:.1f} s  |  {len(dataset)} ejemplos  |  Muestras impresas: {printed}")

    return {
        "model_path": model_path,
        "num_examples": len(dataset),
        "mean_similarity": mean_final,     # métrica final (max por ejemplo) — se mantiene igual
        "mean_fuzzy": mean_fuzzy,          # nueva media fuzzy
        "mean_semantic": mean_semantic,    # nueva media semántica
        "distribution": dist,              # distribución de la métrica final
        "elapsed_seconds": elapsed,
    }


In [None]:
all_summaries = []
for path in MODEL_PATHS:
    summary = evaluate_model(path, dataset)
    all_summaries.append(summary)

# CSV: una fila por modelo (añadimos mean_fuzzy y mean_semantic)
import csv, json
with open(csv_path, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow([
        "model_path",
        "num_examples",
        "mean_similarity",   # final (max por ejemplo)
        "mean_fuzzy",
        "mean_semantic",
        "elapsed_seconds",
        "distribution_json"
    ])
    for s in all_summaries:
        writer.writerow([
            s["model_path"],
            s["num_examples"],
            f"{s['mean_similarity']:.4f}",
            f"{s['mean_fuzzy']:.4f}",
            f"{s['mean_semantic']:.4f}",
            f"{s['elapsed_seconds']:.2f}",
            json.dumps(s["distribution"], ensure_ascii=False, separators=(",", ":")),
        ])

# JSON: objeto con toda la info (incluye las nuevas medias)
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(all_summaries, f, ensure_ascii=False, indent=2)

print("\n📁 Resultados guardados en tu Drive:")
print(f" - CSV : {csv_path}")
print(f" - JSON: {json_path}")

## Codigo para probar el Modelo GGUF

In [None]:
# # Bucle interactivo de chat
# print("🦙 Chat con tu modelo GGUF. Escribe 'salir' para terminar.\n")

# while True:
#     user_input = input("🧑 Tú: ")
#     if user_input.lower() in ["salir", "exit", "quit"]:
#         print("👋 Hasta pronto.")
#         break

#     prompt = (
#         "<|im_start|>system\n"
#         "Eres un Diccionario/Traductor experto en leonés.<|im_end|>\n"
#         "<|im_start|>user\n"
#         f"{user_input}<|im_end|>\n"
#         "<|im_start|>assistant\n"
#     )

#     output = llm(prompt, max_tokens=200)
#     respuesta = output["choices"][0]["text"].strip()
#     print(f"🤖 LLM: {respuesta}\n")
