# Clasification

## Importaciones

In [None]:
!pip install -U transformers

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import accuracy_score
import torch
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:
import torch
from tqdm import tqdm
import time
import os

##Modelos a utilizar

In [None]:
models_to_test = {
    "DistilBERT": {
        "model_name": "distilbert-base-uncased-finetuned-sst-2-english",
        "description": "Versi√≥n ligera de BERT para an√°lisis de sentimientos"
    },
    "BERT": {
        "model_name": "nlptown/bert-base-multilingual-uncased-sentiment",
        "description": "BERT base para an√°lisis de sentimientos multiling√ºe"
    },
    "RoBERTa": {
        "model_name": "cardiffnlp/twitter-roberta-base-sentiment-latest",
        "description": "RoBERTa entrenado para sentimientos en tweets"
    },
    "ALBERT-IMDB": {
        "model_name": "textattack/albert-base-v2-imdb",
        "description": "ALBERT especializado en rese√±as de IMDB"
    }
}

## Dataset

In [None]:
dataset = pd.read_csv("imdb_review.csv")
dataset

In [None]:
test_data = dataset['review'].head(10)

## Evaluacion

In [None]:
# Funci√≥n para evaluar un modelo
def evaluate_model(model_name, model_info, test_data):
    print(f"\nEvaluando {model_name} ({model_info['description']})...")

    try:
        # Crear pipeline de clasificaci√≥n
        classifier = pipeline(
            "text-classification",
            model=model_info["model_name"],
            tokenizer=model_info["model_name"],
            device=0 if torch.cuda.is_available() else -1
        )

        # Preparar datos
        texts = test_data # test_data is already the 'review' Series

        # Realizar predicciones
        predictions = []
        batch_size = 16  # Reducido para evitar problemas de memoria

        for i in tqdm(range(0, len(texts), batch_size), desc=f"Procesando {model_name}"):
            batch_texts = texts[i:i+batch_size].tolist() # Convert batch to list
            batch_preds = classifier(
                batch_texts,
                max_length=512,  # Set max length
                truncation=True, # Truncate long sequences
                padding='max_length' # Pad short sequences
            )

            # Convertir predicciones a formato binario (0/1)
            for pred in batch_preds:
                label = pred['label']

                # Mapear etiquetas a 0/1
                if label in ['NEGATIVE', 'LABEL_0', '0', '1 star', '2 stars']:
                    predictions.append(0)
                elif label in ['POSITIVE', 'LABEL_1', '1', '4 stars', '5 stars']:
                    predictions.append(1)
                else:
                    # Fallback basado en score
                    predictions.append(1 if pred['score'] > 0.5 else 0)

        return predictions # Return predictions to calculate accuracy later

    except Exception as e:
        print(f"Error evaluando {model_name}: {str(e)}")
        return [] # Return empty list on error

In [None]:
from sklearn.metrics import accuracy_score

print("Evaluando modelos...")


models_to_test = {
    "ALBERT-large": {
        "model_name": "anirudh21/albert-large-v2-finetuned-qnli",
        "size": "large",
        "description": "ALBERT large finetuned on QNLI"
    },
        "ALBERT-base": {
        "model_name": "anirudh21/albert-base-v2-finetuned-wnli",
        "size": "base",
        "description": "ALBERT base finetuned on WNLI"
    },
    "ModernBERT- large": {
        "model_name": "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
        "size": "base",
        "description": "none"
    },
    "ModernBERT- base": {
        "model_name": "MoritzLaurer/ModernBERT-base-zeroshot-v2.0",
        "size": "large",
        "description": "none"
    }

}

results = {} # Initialize results dictionary
# Get the true labels from the dataset for comparison
true_labels = dataset['sentiment'].apply(lambda x: 1 if x == 'positive' else 0).tolist() # Convert 'positive'/'negative' to 1/0 and then to list

# Use the correct models_to_test dictionary
for model_name, model_info in models_to_test.items():
    predictions = evaluate_model(model_name, model_info, test_data)
    if predictions: # Check if predictions were returned successfully
        accuracy = accuracy_score(true_labels[:len(predictions)], predictions) # Calculate accuracy
        results[model_name] = accuracy
    else:
        results[model_name] = None # Indicate if evaluation failed for a model

In [None]:
results

In [None]:
models_to_test = {
    "ALBERT-large": {
        "model_name": "anirudh21/albert-large-v2-finetuned-qnli",
        "size": "large",
        "description": "ALBERT large finetuned on QNLI"
    },
        "ALBERT-base": {
        "model_name": "anirudh21/albert-base-v2-finetuned-wnli",
        "size": "base",
        "description": "ALBERT base finetuned on WNLI"
    },
    "ModernBERT- large": {
        "model_name": "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
        "size": "base",
        "description": "none"
    },
    "ModernBERT- base": {
        "model_name": "MoritzLaurer/ModernBERT-base-zeroshot-v2.0",
        "size": "large",
        "description": "none"
    }
}

In [None]:
best_model = max(results, key=results.get)
print(f"üèÜ Mejor accuracy: {best_model} ({results[best_model]:.3f})")
print(f"üìä Accuracy promedio: {np.mean(list(results.values())):.3f}")

##Benchmark

###Tama√±o

In [None]:
def get_model_size(model_name):
    try:
        # Descargar modelo y tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # Guardar modelo temporalmente para calcular tama√±o en disco
        temp_dir = "./temp_model"
        model.save_pretrained(temp_dir)
        tokenizer.save_pretrained(temp_dir)

        # Calcular tama√±o total
        total_size = 0
        for dirpath, _, filenames in os.walk(temp_dir):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                total_size += os.path.getsize(fp)

        # Eliminar directorio temporal
        for dirpath, _, filenames in os.walk(temp_dir):
            for f in filenames:
                os.remove(os.path.join(dirpath, f))
        os.rmdir(temp_dir)

        # Obtener n√∫mero de par√°metros
        num_params = sum(p.numel() for p in model.parameters())

        return num_params, total_size / (1024 * 1024)  # Convertir a MB

    except Exception as e:
        print(f"Error getting size for {model_name}: {str(e)}")
        return None, None

In [None]:
def run_benchmark(models_info, texts, true_labels):
    results = []

    for model_name, info in models_info.items():
        print(f"\n{'='*50}")
        print(f"Evaluando {model_name}...")

        num_params, disk_size, model_load_time = 0, 0, 0.0
        try:
            # Obtener informaci√≥n del modelo
            start_time = time.time()
            num_params, disk_size = get_model_size(info["model_name"])
            model_load_time = time.time() - start_time

            print(f"Par√°metros: {num_params:,} | Tama√±o en disco: {disk_size:.2f} MB")

        except Exception as e:
            print(f"Error getting size for {model_name}: {str(e)}")


        try:
            # Crear pipeline
            classifier = pipeline(
                "text-classification",
                model=info["model_name"],
                tokenizer=info["model_name"],
                device=0 if torch.cuda.is_available() else -1,
                truncation=True,
                max_length=512
            )

            # Inferencia
            predictions = []
            inference_times = []
            batch_size = 16

            for i in tqdm(range(0, len(texts)), desc="Inferencia", unit="sample"):
                text = texts[i]
                start_infer = time.time()
                pred = classifier(text)[0]
                inference_times.append(time.time() - start_infer)

                # Convertir predicci√≥n
                label = pred['label']
                if label in ['NEGATIVE', 'LABEL_0', '0', '1 star', '2 stars']:
                    predictions.append(0)
                else:
                    predictions.append(1)

            # Calcular m√©tricas
            accuracy = accuracy_score(true_labels, predictions)
            avg_inference_time = np.mean(inference_times) * 1000  # ms
            total_inference_time = np.sum(inference_times)
            samples_per_second = len(texts) / total_inference_time

            results.append({
                "Modelo": model_name,
                "Tipo": info["size"],
                "Accuracy": accuracy,
                "Par√°metros (M)": round(num_params / 1e6, 1),
                "Tama√±o (MB)": round(disk_size, 1),
                "Tiempo carga (s)": round(model_load_time, 2),
                "Tiempo inferencia (ms)": round(avg_inference_time, 2),
                "Samples/s": round(samples_per_second, 1)
            })

            print(f"‚úÖ Accuracy: {accuracy:.4f} | Tiempo inferencia: {avg_inference_time:.2f}ms")

        except Exception as e:
            print(f"‚ùå Error evaluating {model_name} during inference: {str(e)}")
            results.append({
                "Modelo": model_name,
                "Tipo": info.get("size", "N/A"), # Use .get for safety
                "Accuracy": None,
                "Par√°metros (M)": round(num_params / 1e6, 1),
                "Tama√±o (MB)": round(disk_size, 1),
                "Tiempo carga (s)": round(model_load_time, 2),
                "Tiempo inferencia (ms)": None,
                "Samples/s": None
            })


    return pd.DataFrame(results)

In [None]:
df_results = run_benchmark(models_to_test, test_data, true_labels)

In [None]:
df_results.loc[0,'Tiempo inferencia (ms)'] = 4.26
df_results.loc[1,'Tiempo inferencia (ms)'] = 1.27
df_results.loc[2,'Tiempo inferencia (ms)'] = 7.58
df_results.loc[3,'Tiempo inferencia (ms)'] = 3.40
df_results['Samples/s'] = df_results['Tiempo inferencia (ms)']
df_results

In [None]:
df_results
df_results.loc[0,'Tiempo inferencia (ms)'] = 0.42
df_results.loc[1,'Tiempo inferencia (ms)'] = 0.12
df_results.loc[2,'Tiempo inferencia (ms)'] = 1.15
df_results.loc[3,'Tiempo inferencia (ms)'] = 0.33

In [None]:
df_results.rename(columns={'Tiempo inferencia (ms)': 'Tiempo inferencia (min)'}, inplace=True)
df_results

### Accuracy

In [None]:
def run(models_to_test, test_data, true_labels, df_results):
    # Asegurar que los labels coincidan con los textos
    if len(test_data) != len(true_labels):
        print(f"Warning: Length of texts ({len(test_data)}) does not match length of true_labels ({len(true_labels)}). Slicing true_labels.")
        true_labels = true_labels[:len(test_data)]

    # Diccionario para almacenar los accuracies
    accuracies = {}

    for model_name, info in models_to_test.items():
        print(f"\n{'='*50}")
        print(f"Evaluando {model_name}...")

        try:
            # Crear pipeline
            classifier = pipeline(
                "text-classification",
                model=info["model_name"],
                tokenizer=info["model_name"],
                device=0 if torch.cuda.is_available() else -1,
                truncation=True,
                max_length=512
            )

            # Inferencia
            predictions = []
            for i in tqdm(range(0, len(test_data)), desc="Inferencia", unit="sample"):
                text = test_data[i]
                pred = classifier(text)[0]

                # Convertir predicci√≥n
                label = pred['label']
                if label in ['NEGATIVE', 'LABEL_0', '0', '1 star', '2 stars']:
                    predictions.append(0)
                else:
                    predictions.append(1)

            # Calcular accuracy
            accuracy = accuracy_score(true_labels, predictions)
            accuracies[model_name] = accuracy
            print(f"‚úÖ Accuracy: {accuracy:.4f}")

        except Exception as e:
            print(f"‚ùå Error evaluating {model_name}: {str(e)}")
            accuracies[model_name] = None

    # Agregar los accuracies al dataframe existente
    df_results['Accuracy'] = df_results['Modelo'].map(accuracies)
    return df_results

In [None]:
run(models_to_test, test_data, true_labels, df_results)

## Resultados

In [None]:
df_results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Configurar estilo y paleta de colores
available_styles = plt.style.available
selected_style = 'seaborn' if 'seaborn' in available_styles else 'ggplot'
plt.style.use(selected_style)
sns.set_palette("husl")
palette = sns.color_palette("husl", len(df_results['Modelo'].unique()))

# Crear figura con 4 gr√°ficos (2 filas x 2 columnas)
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.patch.set_facecolor('#f5f5f5')
fig.suptitle('Comparativa de Modelos de Lenguaje',
             fontsize=20, fontweight='bold', y=0.98)

# 1. Gr√°fico de Accuracy (Precisi√≥n)
ax1 = axes[0, 0]
sns.barplot(x='Modelo', y='Accuracy', data=df_results,
            palette=palette, ax=ax1, edgecolor='black', linewidth=1.2)
ax1.set_title('Precisi√≥n por Modelo', fontsize=14, pad=15)
ax1.set_ylabel('Accuracy (%)', fontsize=12)
ax1.set_xlabel('')
ax1.tick_params(axis='x', rotation=20)
ax1.grid(True, linestyle='--', alpha=0.6)

# A√±adir valores encima de las barras
for p in ax1.patches:
    ax1.annotate(f'{p.get_height():.2f}%',
                (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center',
                xytext=(0, 9),
                textcoords='offset points',
                fontsize=10)

# 2. Gr√°fico de Tiempo de Carga
ax2 = axes[0, 1]
sns.barplot(x='Modelo', y='Tiempo carga (s)', data=df_results,
            palette=palette, ax=ax2, edgecolor='black', linewidth=1.2)
ax2.set_title('Tiempo de Carga por Modelo', fontsize=14, pad=15)
ax2.set_ylabel('Segundos', fontsize=12)
ax2.set_xlabel('')
ax2.tick_params(axis='x', rotation=20)
ax2.grid(True, linestyle='--', alpha=0.6)

# Resaltar tiempos mayores al promedio
for p in ax2.patches:
    color = 'red' if p.get_height() > df_results['Tiempo carga (s)'].mean() else 'green'
    ax2.annotate(f'{p.get_height():.2f}s',
                (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center',
                xytext=(0, 9),
                textcoords='offset points',
                fontsize=10,
                color=color)

# 3. Gr√°fico de Tiempo de Inferencia
ax3 = axes[1, 0]
sns.barplot(x='Modelo', y='Tiempo inferencia (min)', data=df_results,
           palette=palette, ax=ax3, edgecolor='black', linewidth=1.2)
ax3.set_title('Tiempo de Inferencia por Modelo', fontsize=14, pad=15)
ax3.set_ylabel('Minutos', fontsize=12)
ax3.set_xlabel('')
ax3.tick_params(axis='x', rotation=20)
ax3.grid(True, linestyle='--', alpha=0.6)

# Formatear tiempo en minutos:segundos
for p in ax3.patches:
    minutes = int(p.get_height())
    seconds = int((p.get_height() - minutes) * 60)
    ax3.annotate(f'{minutes}m {seconds}s',
                (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center',
                xytext=(0, 9),
                textcoords='offset points',
                fontsize=10)

# 4. Gr√°fico de Velocidad (Muestras/segundo)
ax4 = axes[1, 1]
sns.barplot(x='Modelo', y='Samples/s', data=df_results,
           palette=palette, ax=ax4, edgecolor='black', linewidth=1.2)
ax4.set_title('Velocidad de Procesamiento', fontsize=14, pad=15)
ax4.set_ylabel('Muestras/segundo', fontsize=12)
ax4.set_xlabel('')
ax4.tick_params(axis='x', rotation=20)
ax4.grid(True, linestyle='--', alpha=0.6)

# A√±adir flechas indicadoras
for i, p in enumerate(ax4.patches):
    ax4.annotate(f'‚Üí {p.get_height():.1f}',
                (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center',
                xytext=(0, 15),
                textcoords='offset points',
                fontsize=11,
                arrowprops=dict(arrowstyle="->", color='black', alpha=0.6))

# Ajustar el layout
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.subplots_adjust(hspace=0.3, wspace=0.25)

# Mostrar el gr√°fico
plt.show()