4. Modelos Contrastivos (CLIP-like)
Entrena un modelo basado en contraste, donde el objetivo es minimizar la distancia entre embeddings de texto humano y maximizar la distancia entre humano y generado por IA.

Ventajas:

Permite aprender representaciones robustas.
Se puede usar junto con un clasificador simple para la predicción final.
Ejemplo de entrenamiento contrastivo:

# Parameters

In [1]:
EPOCHS = 5
LEARNING_RATE = 0.0001
BATCH_SIZE = 256
LAYERS_TO_TRAIN = 0

In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [3]:
ai_generated_path = "pan24-generative-authorship-news/machines"
human_path = "pan24-generative-authorship-news/human.jsonl"

In [4]:
import warnings
import logging

warnings.filterwarnings("ignore", message=".*overflowing tokens.*")
logging.disable(logging.WARNING)

## Libraries

In [5]:
import os
import json
import pandas as pd

## Import data

In [6]:
model, id, text = [], [], []

# Loop through every file in the directory
for filename in os.listdir(ai_generated_path):
    # Check if the file is a JSONL file
    if filename.endswith('.jsonl'):
        filepath = os.path.join(ai_generated_path, filename)
        with open(filepath, 'r', encoding='utf-8') as jsonl_file:
            for line in jsonl_file:
                # Each line is a separate JSON object
                data = json.loads(line)
                model.append(filename)
                id.append(data['id'])
                text.append(data['text'])

df_generated = pd.DataFrame({'model': model, 'id': id, 'text': text, 'ai_generated': 1})
df_generated

Unnamed: 0,model,id,text,ai_generated
0,alpaca-7b.jsonl,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...,Inaugural Address: President Joseph R. Biden J...,1
1,alpaca-7b.jsonl,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...,Setting the Record Straight: Fact-Checking the...,1
2,alpaca-7b.jsonl,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...,Joe Biden Takes the Oath of Office as 46th Pre...,1
3,alpaca-7b.jsonl,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...,Joe Biden Takes Oath as 46th President of Unit...,1
4,alpaca-7b.jsonl,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...,Amanda Gorman's Inspiring Poem Celebrates Hope...,1
...,...,...,...,...
14126,vicgalle-gpt2-open-instruct-v1.jsonl,vicgalle-gpt2-open-instruct-v1/news-2021-01-01...,'The Disappearance of Gabby Petito' – A Compre...,1
14127,vicgalle-gpt2-open-instruct-v1.jsonl,vicgalle-gpt2-open-instruct-v1/news-2021-01-01...,"Utah State Police Search for Gabby Petito, Tra...",1
14128,vicgalle-gpt2-open-instruct-v1.jsonl,vicgalle-gpt2-open-instruct-v1/news-2021-01-01...,McKenna's Lost Friend: Debunking the Evidence ...,1
14129,vicgalle-gpt2-open-instruct-v1.jsonl,vicgalle-gpt2-open-instruct-v1/news-2021-01-01...,"""Gunshots Found in Florida Nature Preserve: A ...",1


In [7]:
id, text = [], []

with open(human_path, 'r', encoding='utf-8') as jsonl_file:
    for line in jsonl_file:
        # Each line is a separate JSON object
        data = json.loads(line)
        id.append(data['id'])
        text.append(data['text'])

df_human = pd.DataFrame({'model': 'Human', 'id': id, 'text': text, 'ai_generated': 0})
df_human

Unnamed: 0,model,id,text,ai_generated
0,Human,articles-cleaned-truncated/news-2021-01-01-202...,Inaugural Address by President Joseph R. Biden...,0
1,Human,articles-cleaned-truncated/news-2021-01-01-202...,Fact check: Biden inauguration impacted by pan...,0
2,Human,articles-cleaned-truncated/news-2021-01-01-202...,Highlights from Joe Biden's 2021 inauguration\...,0
3,Human,articles-cleaned-truncated/news-2021-01-01-202...,"Biden takes the helm, appeals for unity to tak...",0
4,Human,articles-cleaned-truncated/news-2021-01-01-202...,'The Hill We Climb': Read Amanda Gorman's inau...,0
...,...,...,...,...
1082,Human,articles-cleaned-truncated/news-2021-01-01-202...,How amateur detectives on social media helped ...,0
1083,Human,articles-cleaned-truncated/news-2021-01-01-202...,Authorities searching for missing 22-year-old ...,0
1084,Human,articles-cleaned-truncated/news-2021-01-01-202...,Univ. of Wisconsin Oshkosh student helping Gab...,0
1085,Human,articles-cleaned-truncated/news-2021-01-01-202...,Did the Internet Actually Help Find Gabby Peti...,0


In [8]:
df = pd.concat([df_generated, df_human])[['text', 'ai_generated', 'id']]
df

Unnamed: 0,text,ai_generated,id
0,Inaugural Address: President Joseph R. Biden J...,1,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...
1,Setting the Record Straight: Fact-Checking the...,1,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...
2,Joe Biden Takes the Oath of Office as 46th Pre...,1,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...
3,Joe Biden Takes Oath as 46th President of Unit...,1,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...
4,Amanda Gorman's Inspiring Poem Celebrates Hope...,1,alpaca-7b/news-2021-01-01-2021-12-31-bideninau...
...,...,...,...
1082,How amateur detectives on social media helped ...,0,articles-cleaned-truncated/news-2021-01-01-202...
1083,Authorities searching for missing 22-year-old ...,0,articles-cleaned-truncated/news-2021-01-01-202...
1084,Univ. of Wisconsin Oshkosh student helping Gab...,0,articles-cleaned-truncated/news-2021-01-01-202...
1085,Did the Internet Actually Help Find Gabby Peti...,0,articles-cleaned-truncated/news-2021-01-01-202...


# Process Data - Combinaciones únicamente del mismo id

In [9]:
from sklearn.model_selection import train_test_split
import pandas as pd

test_size = 0.25
val_size = 0.125
_adjusted_val_size = val_size / (1 - test_size)

# Extraer el segundo y tercer segmento de los IDs
df['base_id'] = df['id'].apply(lambda x: '/'.join(x.split('/')[1:]))  # Coger los ids sin la parte que identifica al autor del fragmento de texto.

# Paso 1: Dividir los datos según los `base_id`
base_ids = df['base_id'].unique()
train_base_ids, test_base_ids = train_test_split(base_ids, test_size=test_size, random_state=1337)
train_base_ids, val_base_ids = train_test_split(train_base_ids, test_size=_adjusted_val_size, random_state=1337) 

# Crear DataFrames por conjunto
train = df[df['base_id'].isin(train_base_ids)]
val = df[df['base_id'].isin(val_base_ids)]
test = df[df['base_id'].isin(test_base_ids)]

train.reset_index(drop=True, inplace=True)
val.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

print(f"train shape: {train.shape}")
print(f"val shape: {val.shape}")
print(f"test shape: {test.shape}")

train shape: (9506, 4)
val shape: (1904, 4)
test shape: (3808, 4)


In [10]:
def create_combinations_within_id(df):
    # Lista para almacenar las combinaciones de cada `base_id`
    combinations = []
    
    # Iterar sobre cada `base_id`
    for base_id, group in df.groupby('base_id'):
        # Filtrar textos humanos e IA dentro del grupo
        df_human = group[group['ai_generated'] == 0][['text']].reset_index(drop=True)
        df_ia = group[group['ai_generated'] == 1][['text']].reset_index(drop=True)
        
        # Producto cartesiano dentro del `base_id`
        cartesian_df = df_human.merge(df_ia, how='cross', suffixes=('_human', '_ia'))
        cartesian_df = cartesian_df.sample(frac=1).reset_index(drop=True)
        
        # Crear las dos disposiciones
        total_combinations = len(cartesian_df)
        
        half_1 = cartesian_df.iloc[:total_combinations // 2].copy()
        half_1['comment_text_1'] = half_1['text_human']
        half_1['comment_text_2'] = half_1['text_ia']
        half_1['list'] = 0  # Etiqueta 0
        
        half_2 = cartesian_df.iloc[total_combinations // 2:].copy()
        half_2['comment_text_1'] = half_2['text_ia']
        half_2['comment_text_2'] = half_2['text_human']
        half_2['list'] = 1  # Etiqueta 1
        
        # Combinar y agregar al resultado final
        balanced_df = pd.concat([half_1, half_2], ignore_index=True)
        combinations.append(balanced_df)
    
    # Concatenar todas las combinaciones y barajar
    return pd.concat(combinations, ignore_index=True).sample(frac=1).reset_index(drop=True)


In [11]:
# Generar combinaciones restringidas por `id` para cada conjunto
train = create_combinations_within_id(train)
val = create_combinations_within_id(val)
test = create_combinations_within_id(test)

In [12]:
# Print the dimensions
print(f"train shape: {train.shape} / Text on comment_text_1 is human-generated: {train['list'].value_counts()[0]} - Text on comment_text_2 is human-generated: {train['list'].value_counts()[1]}")
print(f"val shape: {val.shape} / Text on comment_text_1 is human-generated: {val['list'].value_counts()[0]} - Text on comment_text_2 is human-generated: {val['list'].value_counts()[1]}")
print(f"test shape: {test.shape} / Text on comment_text_1 is human-generated: {test['list'].value_counts()[0]} - Text on comment_text_2 is human-generated: {test['list'].value_counts()[1]}")

train shape: (8827, 5) / Text on comment_text_1 is human-generated: 4074 - Text on comment_text_2 is human-generated: 4753
val shape: (1768, 5) / Text on comment_text_1 is human-generated: 816 - Text on comment_text_2 is human-generated: 952
test shape: (3536, 5) / Text on comment_text_1 is human-generated: 1632 - Text on comment_text_2 is human-generated: 1904


# Model

In [13]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch
from transformers import BertTokenizer, BertModel

# Tokenizador y modelo
model_name = "Lau123/distilbert-base-uncased-detect_ai_generated_text"
# tokenizer = DistilBertTokenizer.from_pretrained(model_name)
# individual_model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = BertTokenizer.from_pretrained(model_name)
individual_model =  BertModel.from_pretrained(model_name).from_pretrained(model_name, num_labels=2)

# Configuración del dispositivo y optimizador
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# individual_model.to(device)

In [14]:
# # Freeze all layers except the classifier layer
# for name, param in individual_model.named_parameters():
#     if name != "classifier.weight" and name != "classifier.bias":
#         param.requires_grad = False

for param in individual_model.parameters():
    param.requires_grad = False
if LAYERS_TO_TRAIN > 0:
    for layer in individual_model.encoder.layer[-LAYERS_TO_TRAIN:]:
        for param in layer.parameters():
            param.requires_grad = True

for param in individual_model.pooler.dense.parameters():
    param.requires_grad = True

# Verify that only the classifier layer is trainable
for name, param in individual_model.named_parameters():
    print(f"{name}: requires_grad = {param.requires_grad}")

embeddings.word_embeddings.weight: requires_grad = False
embeddings.position_embeddings.weight: requires_grad = False
embeddings.token_type_embeddings.weight: requires_grad = False
embeddings.LayerNorm.weight: requires_grad = False
embeddings.LayerNorm.bias: requires_grad = False
encoder.layer.0.attention.self.query.weight: requires_grad = False
encoder.layer.0.attention.self.query.bias: requires_grad = False
encoder.layer.0.attention.self.key.weight: requires_grad = False
encoder.layer.0.attention.self.key.bias: requires_grad = False
encoder.layer.0.attention.self.value.weight: requires_grad = False
encoder.layer.0.attention.self.value.bias: requires_grad = False
encoder.layer.0.attention.output.dense.weight: requires_grad = False
encoder.layer.0.attention.output.dense.bias: requires_grad = False
encoder.layer.0.attention.output.LayerNorm.weight: requires_grad = False
encoder.layer.0.attention.output.LayerNorm.bias: requires_grad = False
encoder.layer.0.intermediate.dense.weight: requ

## Contrastivo

In [15]:
from torch.nn.functional import cosine_similarity

def contrastive_loss(embeddings1, embeddings2, labels, margin=0.5):
    sim = cosine_similarity(embeddings1, embeddings2)
    loss = torch.mean(labels * (1 - sim) + (1 - labels) * torch.clamp(sim - margin, min=0))
    return loss

In [16]:
from transformers import BertModel
import torch
import torch.nn.functional as F

class TransformerContrastive(torch.nn.Module):
    def __init__(self, bert_model):
        super(TransformerContrastive, self).__init__()
        self.l1 = bert_model  # Modelo BERT o similar
        self.l2 = torch.nn.Linear(768, 768)  # Proyección del embedding
        self.l3 = torch.nn.Dropout(0.1)

    def forward(self, ids_0, mask_0, token_type_ids_0, ids_1, mask_1, token_type_ids_1):
        # Generar embeddings para ambos textos
        embed_a = self.l1(ids_0, attention_mask=mask_0, token_type_ids=token_type_ids_0).last_hidden_state[:, 0]
        embed_b = self.l1(ids_1, attention_mask=mask_1, token_type_ids=token_type_ids_1).last_hidden_state[:, 0]

        # Proyección a un espacio latente (opcional, pero puede mejorar resultados)
        embed_a = F.gelu(self.l3(self.l2(embed_a)))
        embed_b = F.gelu(self.l3(self.l2(embed_b)))

        return embed_a, embed_b  # Devuelve los embeddings de ambos textos

# Inicializar modelo
model = TransformerContrastive(individual_model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TransformerContrastive(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affi

In [17]:
from transformers import AdamW

loss_fn = torch.nn.BCELoss()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)



In [18]:
from transformers import AdamW

class ContrastiveLoss(torch.nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, embed_a, embed_b, label):
        # Distancia euclidiana entre embeddings
        distance = torch.norm(embed_a - embed_b, p=2, dim=1)

        # Pérdida contrastiva
        loss = label * distance.pow(2) + (1 - label) * F.relu(self.margin - distance).pow(2)
        return loss.mean()

# loss_fn = torch.nn.BCELoss()
loss_fn = ContrastiveLoss()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# Data Generators

In [19]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.comment_text_1 = dataframe.comment_text_1
        self.comment_text_2 = dataframe.comment_text_2
        self.targets = self.data.list
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        comment_text_1 = str(self.comment_text_1[index])
        comment_text_1 = " ".join(comment_text_1.split())
        comment_text_2 = str(self.comment_text_2[index])
        comment_text_2 = " ".join(comment_text_2.split())
        inputs0 = self.tokenizer(comment_text_1, 
                                comment_text_2, 
                                max_length=self.max_len,
                                padding="max_length",
                                truncation=True,
                                # truncation="only_second",
                                # truncation="only_first",
                                # truncation="longest_first",
                                return_overflowing_tokens=False,
                                return_token_type_ids=True,)
                                # return_overflowing_tokens=True)
                                # return_overflowing_tokens=False)
        inputs1 = self.tokenizer(comment_text_1,
                                max_length=self.max_len,
                                padding="max_length",
                                truncation=True,
                                return_overflowing_tokens=False,
                                return_token_type_ids=True,)
        return {
            'ids_0': torch.tensor(inputs0.input_ids, dtype=torch.long),
            'mask_0': torch.tensor(inputs0.attention_mask, dtype=torch.long),
            'token_type_ids_0': torch.tensor(inputs0.token_type_ids, dtype=torch.long),
            'ids_1': torch.tensor(inputs1.input_ids, dtype=torch.long),
            'mask_1': torch.tensor(inputs1.attention_mask, dtype=torch.long),
            'token_type_ids_1': torch.tensor(inputs1.token_type_ids, dtype=torch.long),
            'labels': torch.tensor(self.targets[index], dtype=torch.long)
          }


In [20]:
# Instancia el dataset
train_dataset = CustomDataset(dataframe=train, tokenizer=tokenizer, max_len=512)
val_dataset = CustomDataset(dataframe=val, tokenizer=tokenizer, max_len=512)
test_dataset = CustomDataset(dataframe=test, tokenizer=tokenizer, max_len=512)

In [21]:
from torch.utils.data import DataLoader

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# Entrenamiento y validación

In [22]:
def c_at_1(targets, preds):
    """
    Calculates the C@1 metric:
    - Non-answers (predictions marked as -1) are given a score of 0.5.
    - Remaining cases are scored based on accuracy.
    
    Parameters:
        targets (np.array): Ground truth labels.
        preds (np.array): Predictions, where -1 indicates a non-answer.
    
    Returns:
        float: C@1 metric.
    """
    correct = (targets == preds)  # Boolean array for correct predictions
    unanswered = preds == -1     # Boolean array for non-answers
    
    num_correct = correct.sum()
    num_total = len(targets)
    num_unanswered = unanswered.sum()
    
    return (num_correct + num_unanswered * 0.5) / num_total

In [23]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, brier_score_loss, fbeta_score
import numpy as np

# Función de entrenamiento
def train_epoch(model, loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    for i, batch in enumerate(loader):
        print(f"Batch {i+1}/{len(loader)}")
        labels = batch['labels'].unsqueeze(1).to(device).float()
        batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        embed_a, embed_b = model(**batch)

        loss = loss_fn(embed_a, embed_b, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        if i == 2:
            break

    return total_loss / len(loader)

# Función de evaluación para el modelo contrastivo
def evaluate(model, loader, device):
    model.eval()
    all_distances, all_labels = [], []

    with torch.no_grad():
        for batch in loader:
            # Separar etiquetas y mover datos al dispositivo
            labels = batch['labels'].float().to(device)
            batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}

            # Generar embeddings
            embed_a, embed_b = model(
                ids_0=batch['ids_0'], 
                mask_0=batch['mask_0'], 
                token_type_ids_0=batch['token_type_ids_0'],
                ids_1=batch['ids_1'], 
                mask_1=batch['mask_1'], 
                token_type_ids_1=batch['token_type_ids_1']
            )

            # Calcular distancias euclidianas
            distances = torch.norm(embed_a - embed_b, p=2, dim=1)

            # Guardar distancias y etiquetas
            all_distances.extend(distances.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convertir a numpy arrays
    all_distances = np.array(all_distances)
    all_labels = np.array(all_labels)

    # Calcular métricas
    mean_positive_distance = all_distances[all_labels == 1].mean()  # Donde el humano es el segundo texto
    mean_negative_distance = all_distances[all_labels == 0].mean()  # Donde el humano es el primer texto
    distance_ratio = mean_positive_distance / (mean_negative_distance + 1e-6)  # Relación entre las dos

    # ROC AUC para evaluar la separación entre las clases
    roc_auc = roc_auc_score(all_labels, -all_distances)  # Menores distancias => más similares

    metrics = {
        "mean_positive_distance": mean_positive_distance,
        "mean_negative_distance": mean_negative_distance,
        "distance_ratio": distance_ratio,
        "roc_auc": roc_auc,
    }

    return metrics

In [24]:
history = {
    "train_loss": [],
    "train_metrics": [],
    "val_metrics": []
}

save_path = f"models/models_contrastive/fine_tuned_model_{EPOCHS}_epochs_{LEARNING_RATE}_lr_{LAYERS_TO_TRAIN}_layers_{BATCH_SIZE}_batch_size"

for epoch in range(EPOCHS):
    print(f"Starting Epoch {epoch + 1}/{EPOCHS}")
    print("* Training")
    train_loss = train_epoch(model, train_loader, optimizer, loss_fn, device)

    print("* Saving model")
    _epoch_save_path = f"{save_path}_checkoint_{epoch + 1}.pth"
    torch.save(model, _epoch_save_path)

    print("* Calculating metrics for training")
    train_metrics = evaluate(model, train_loader, device)
    print("* Calculating metrics for validation")
    val_metrics = evaluate(model, val_loader, device)

    history["train_loss"].append(train_loss)
    history["train_metrics"].append(train_metrics)
    history["val_metrics"].append(val_metrics)

    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}")
    print("Train Metrics:")
    for metric_name, value in train_metrics.items():
        print(f"  {metric_name}: {value:.4f}")
    print("Validation Metrics:")
    for metric_name, value in val_metrics.items():
        print(f"  {metric_name}: {value:.4f}")

    break

Starting Epoch 1/5
* Training
Batch 1/35
Batch 2/35
Batch 3/35
* Saving model
* Calculating metrics for training
* Calculating metrics for validation
Epoch 1/5
Train Loss: 3.0727
Train Metrics:
  mean_positive_distance: 3.4276
  mean_negative_distance: 3.0667
  distance_ratio: 1.1177
  roc_auc: 0.4225
Validation Metrics:
  mean_positive_distance: 3.4169
  mean_negative_distance: 3.0563
  distance_ratio: 1.1180
  roc_auc: 0.4052


## Final evaluation (Classification)

In [25]:
import gc  # Para recolección de basura

human_text_tuples = [(text, text, 0) for text in train['text_human']]
human_df = pd.DataFrame(human_text_tuples, columns=['comment_text_1', 'comment_text_2', 'list'])
human_dataset = CustomDataset(dataframe=human_df, tokenizer=tokenizer, max_len=512)
human_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)

human_embeddings = []
for i, batch in enumerate(human_loader):
    print(f"Batch {i+1}/{len(human_loader)}")
    labels = batch['labels'].unsqueeze(1).to(device).float()
    batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
    embed_a, _ = model(**batch)
    human_embeddings.extend(embed_a.cpu())

stacked = torch.stack(human_embeddings)
prototype_human = stacked.mean(dim=0).to(device)


del human_embeddings, batch, embed_a, labels
torch.cuda.empty_cache()
gc.collect()

Batch 1/35
Batch 2/35
Batch 3/35
Batch 4/35
Batch 5/35
Batch 6/35
Batch 7/35
Batch 8/35
Batch 9/35
Batch 10/35
Batch 11/35
Batch 12/35
Batch 13/35
Batch 14/35
Batch 15/35
Batch 16/35
Batch 17/35
Batch 18/35
Batch 19/35
Batch 20/35
Batch 21/35
Batch 22/35
Batch 23/35
Batch 24/35
Batch 25/35
Batch 26/35
Batch 27/35
Batch 28/35
Batch 29/35
Batch 30/35
Batch 31/35
Batch 32/35
Batch 33/35
Batch 34/35
Batch 35/35


20

In [None]:
# Función de evaluación para la clasificación final
def evaluate(model, loader, prototype_human, device):
    model.eval()
    preds, targets, probabilities = [], [], []

    with torch.no_grad():
        for i, batch in enumerate(loader):
            print(f"Batch {i+1}/{len(loader)}")
            labels = batch['labels'].unsqueeze(1).float()
            batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
            embed_a, embed_b = model(**batch)
            similarity_a_human = torch.cosine_similarity(embed_a, prototype_human.unsqueeze(0), dim=1)
            similarity_b_human = torch.cosine_similarity(embed_b, prototype_human.unsqueeze(0), dim=1)
            predictions = (similarity_a_human <= similarity_b_human).int()  # 1 for sim_b > sim_a, else 0
            prob = similarity_b_human - similarity_a_human
            preds.extend(predictions.cpu().numpy())
            targets.extend(labels.cpu().numpy())
            probabilities.extend(prob.cpu().numpy())
    
    targets = np.array(targets).flatten()
    preds = np.array(preds).flatten()
    probabilities = np.array(probabilities).flatten()


    # Calculate metrics
    print("Calculating roc_auc")
    roc_auc = roc_auc_score(targets, probabilities)
    # print("Calculating brier")
    # brier = brier_score_loss(targets, probabilities)
    print("Calculating f1")
    f1 = f1_score(targets, preds)
    print("Calculating f05u")
    f05u = fbeta_score(targets, preds, beta=0.5)
    print("Calculating c@1")
    c1 = c_at_1(targets, preds)
    print("Calculating mean")
    # mean = np.mean([roc_auc, brier, c1, f1, f05u])
    mean = np.mean([roc_auc, c1, f1, f05u])
    print("Calcualtin accuracy")
    accuracy = accuracy_score(targets, preds)
    
    return {
        "accuracy": accuracy,
        "roc-auc": roc_auc,
        # "brier": brier,
        "c@1": c1,
        "f1": f1,
        "f05u": f05u,
        "mean": mean,
    }

In [None]:
# Evaluación final en el conjunto de prueba
# train_accuracy = evaluate(model, train_loader, prototype_human, device)
# print(f"Train Accuracy: {train_accuracy:.4f}")
# val_accuracy = evaluate(model, val_loader, prototype_human, device)
# print(f"Val Accuracy: {val_accuracy:.4f}")
test_accuracy = evaluate(model, test_loader, prototype_human, device)

Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Batch 35/14
Calculating roc_auc
Calculating f1
Calculating f05u
Calculating c@1
Calculating mean
Calcualtin accuracy


TypeError: unsupported format string passed to dict.__format__

In [31]:
test_accuracy

{'accuracy': 0.46153846153846156,
 'roc-auc': 0.5179647916151755,
 'c@1': 0.46153846153846156,
 'f1': 0.0,
 'f05u': 0.0,
 'mean': 0.24487581328840927}