In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import csv
import os
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import random_split
from sklearn.model_selection import GroupShuffleSplit
from scipy.stats import spearmanr
from tqdm import tqdm 
import geoopt
from datetime import datetime
import json
import numpy as np
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.special import expit  # For sigmoid function

# Assuming you've already defined your model and poincare_distance function as in the original code
# Let's redefine them here to make this script standalone

class HyperbolicMapper(torch.nn.Module):
    def __init__(self, sbert_model_name='sentence-transformers/all-MiniLM-L6-v2', output_dim=32):
        super(HyperbolicMapper, self).__init__()
        # Frozen SBERT
        self.sbert = AutoModel.from_pretrained(sbert_model_name)
        for param in self.sbert.parameters():
            param.requires_grad = False
        
        sbert_hidden_dim = self.sbert.config.hidden_size
        self.curvature = torch.nn.Parameter(torch.tensor(1.0))
        self.temperature = torch.nn.Parameter(torch.tensor(1.0))
        self.projection = nn.Sequential(
            # nn.LayerNorm(sbert_hidden_dim),
            # nn.Linear(sbert_hidden_dim, sbert_hidden_dim//2),
            # nn.GELU(),
            # nn.Linear(sbert_hidden_dim//2, output_dim))
            nn.Linear(sbert_hidden_dim, output_dim))
        print("Initialized model")

    def poincare_project(self, x):
        x = x / self.temperature
        norm = torch.norm(x, p=2, dim=-1, keepdim=True)
        scale = (1 - 1e-5) / torch.clamp(norm * torch.sqrt(self.curvature), min=1e-5)
        return x * scale
        
    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            sbert_output = self.sbert(input_ids=input_ids, attention_mask=attention_mask)
            cls_embedding = sbert_output.last_hidden_state[:, 0]
        
        projected = self.projection(cls_embedding)
        return self.poincare_project(projected)


def poincare_distance(x, y, curvature=1.0, eps=1e-5):
    """Batch-supported Poincaré distance with curvature"""
    sqrt_c = torch.sqrt(curvature + eps)

    # Ensure same batch shape if needed
    if x.dim() == 2 and y.dim() == 3:
        x = x.unsqueeze(1)
    elif x.dim() == 2 and y.dim() == 2:
        pass
    else:
        raise ValueError(f"Incompatible shapes: x {x.shape}, y {y.shape}")
    
    # Compute norms
    x_norm = torch.norm(x, p=2, dim=-1, keepdim=True) * sqrt_c
    y_norm = torch.norm(y, p=2, dim=-1, keepdim=True) * sqrt_c
    # Pairwise distances
    pairwise_norm = torch.norm(x - y, p=2, dim=-1, keepdim=True) * sqrt_c
    
    # Distance
    denominator = (1 - curvature * x_norm**2) * (1 - curvature * y_norm**2)
    inside = 1 + 2 * curvature * pairwise_norm**2 / (denominator.clamp(min=eps))
    return torch.acosh(torch.clamp(inside, min=1+eps)).squeeze(-1) / (sqrt_c + eps)


# Dataset class for evaluating on pairs with binary labels
class BinaryLabelDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=128):
        self.data = self.read_file(file_path)
        self.tokenizer = tokenizer
        self.max_length = max_length
        print(f"Loaded {len(self.data)} sentence pairs")

    def read_file(self, file_path):
        data = []
        problem_rows = 0
    
        with open(file_path, 'r', encoding='utf-8') as file:
            csv_reader = csv.reader(file, delimiter='\t', quotechar=None)
            headers = next(csv_reader, None)  # Read and skip the header row
    
            for row in csv_reader:
                if len(row) == 3:
                    sentence1, sentence2, label_str = row[0], row[2], row[1] 
                    try:
                        # Ensure label is either 0 or 1 (binary)
                        label = int(float(label_str))  # Support for both integer and float formats
                        if label not in [0, 1]:
                            # Normalize any other value to binary (0 or 1)
                            # Typically, values > 0 could be considered paraphrases
                            label = 1 if label > 0 else 0
                        data.append((sentence1.strip(), sentence2.strip(), label))
                    except:
                        continue
                else:
                    problem_rows += 1
    
        print("!!!!!!total problem rows = ", problem_rows)
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sentence1, sentence2, label = self.data[idx]
        
        # Tokenize both sentences
        sent1_input = self.tokenizer(
            sentence1,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
            return_token_type_ids=False
        )
        
        sent2_input = self.tokenizer(
            sentence2,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
            return_token_type_ids=False
        )
        
        return {
            'sent1_input': {k: v.squeeze(0) for k, v in sent1_input.items()},
            'sent2_input': {k: v.squeeze(0) for k, v in sent2_input.items()},
            'label': torch.tensor(label, dtype=torch.float32)
        }


def collate_fn_eval(batch):
    sent1_inputs = {
        k: torch.stack([item['sent1_input'][k] for item in batch])
        for k in batch[0]['sent1_input']
    }
    
    sent2_inputs = {
        k: torch.stack([item['sent2_input'][k] for item in batch])
        for k in batch[0]['sent2_input']
    }
    
    labels = torch.stack([item['label'] for item in batch])
    
    return {
        'sent1_input': sent1_inputs,
        'sent2_input': sent2_inputs,
        'labels': labels
    }


def distance_to_probability(distance, alpha=1.0, beta=0.0):
    """Convert hyperbolic distance to probability using sigmoid transformation.
    
    Args:
        distance: Tensor of pairwise distances
        alpha: Scaling parameter (steepness)
        beta: Shift parameter (threshold)
    
    Returns:
        Probability tensor in [0, 1]
    """
    # We use negative distance because smaller distance means higher similarity
    if isinstance(distance, torch.Tensor):
        distance = distance.detach().cpu()
        return torch.sigmoid(-(distance * alpha - beta))
    
    return expit(-(distance * alpha - beta))  # Returns np.ndarray
    # return expit(-(distance * alpha - beta))


def evaluate_model(model, data_loader, device, alpha=1.0, beta=0.0):
    """Evaluate model on binary classification task.
    
    Args:
        model: Trained hyperbolic embedding model
        data_loader: DataLoader for evaluation data
        device: Computation device
        alpha: Scaling parameter for distance to probability conversion
        beta: Shift parameter for distance to probability conversion
    
    Returns:
        Dictionary with evaluation metrics
    """
    model.eval()
    all_distances = []
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            # Move inputs to device
            sent1_input = {k: v.to(device) for k, v in batch['sent1_input'].items()}
            sent2_input = {k: v.to(device) for k, v in batch['sent2_input'].items()}
            labels = batch['labels'].to(device)
            
            # Get embeddings
            sent1_embed = model(**sent1_input)
            sent2_embed = model(**sent2_input)
            
            # Calculate distances
            distances = poincare_distance(sent1_embed, sent2_embed, curvature=model.curvature)
            
            # Convert to probabilities (1 for same class, 0 for different class)
            probs = distance_to_probability(distances, alpha, beta)
            
            # Store for later computation
            all_distances.extend(distances.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Convert to numpy arrays
    all_distances = np.array(all_distances)
    all_probs = np.array(all_probs)
    all_labels = np.array(all_labels)
    
    # Calculate metrics
    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    roc_auc = auc(fpr, tpr)
    
    precision, recall, _ = precision_recall_curve(all_labels, all_probs)
    pr_auc = average_precision_score(all_labels, all_probs)
    
    return {
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'fpr': fpr,
        'tpr': tpr,
        'precision': precision,
        'recall': recall,
        'distances': all_distances,
        'probs': all_probs,
        'labels': all_labels
    }


def plot_curves(results, db_name, model_name, save_dir="./plots"):
    """Plot ROC and PR curves for the evaluation results.
    
    Args:
        results: Dictionary with evaluation metrics
        model_name: Name of the model for plot titles
        save_dir: Directory to save plots
    """
    os.makedirs(save_dir, exist_ok=True)
    
    # ROC curve
    plt.figure(figsize=(10, 8))
    plt.plot(results['fpr'], results['tpr'], lw=2, label=f'ROC curve (AUC = {results["roc_auc"]:.3f})')
    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc="lower right")
    plt.savefig(f"{save_dir}/{db_name}_{model_name}_roc.png")
    plt.close()
    
    # PR curve
    plt.figure(figsize=(10, 8))
    plt.plot(results['recall'], results['precision'], lw=2, label=f'PR curve (AUC = {results["pr_auc"]:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - {model_name}')
    plt.legend(loc="lower left")
    plt.savefig(f"{save_dir}/{db_name}_{model_name}_pr.png")
    plt.close()
    
    # Distance distribution
    plt.figure(figsize=(12, 8))
    positive_dist = results['distances'][results['labels'] == 1]
    negative_dist = results['distances'][results['labels'] == 0]
    
    plt.hist(positive_dist, bins=50, alpha=0.5, label='Positive pairs', density=True)
    plt.hist(negative_dist, bins=50, alpha=0.5, label='Negative pairs', density=True)
    plt.xlabel('Hyperbolic Distance')
    plt.ylabel('Density')
    plt.title(f'Distance Distribution - {model_name}')
    plt.legend()
    plt.savefig(f"{save_dir}/{db_name}_{model_name}_dist.png")
    plt.close()


def find_optimal_threshold(labels, probs):
    """Find optimal threshold for binary classification.
    
    Args:
        labels: Ground truth labels
        probs: Predicted probabilities
    
    Returns:
        Optimal threshold that maximizes F1 score
    """
    # Calculate F1 score for different thresholds
    f1_scores = []
    thresholds = np.linspace(0, 1, 100)
    
    for threshold in thresholds:
        predictions = (probs >= threshold).astype(int)
        
        # Calculate precision and recall
        true_positives = np.sum((predictions == 1) & (labels == 1))
        false_positives = np.sum((predictions == 1) & (labels == 0))
        false_negatives = np.sum((predictions == 0) & (labels == 1))
        
        precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
        recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        f1_scores.append(f1)
    
    # Find threshold with maximum F1 score
    optimal_idx = np.argmax(f1_scores)
    optimal_threshold = thresholds[optimal_idx]
    max_f1 = f1_scores[optimal_idx]
    
    return optimal_threshold, max_f1


def optimize_distance_conversion(model, eval_loader, device):
    """Find optimal parameters for converting distance to probability.
    
    Args:
        model: Trained model
        eval_loader: DataLoader for evaluation
        device: Computation device
    
    Returns:
        Tuple of (optimal_alpha, optimal_beta)
    """
    # First, collect distances and labels
    model.eval()
    all_distances = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(eval_loader, desc="Collecting distances"):
            sent1_input = {k: v.to(device) for k, v in batch['sent1_input'].items()}
            sent2_input = {k: v.to(device) for k, v in batch['sent2_input'].items()}
            labels = batch['labels'].to(device)
            
            sent1_embed = model(**sent1_input)
            sent2_embed = model(**sent2_input)
            
            distances = poincare_distance(sent1_embed, sent2_embed, curvature=model.curvature)
            
            all_distances.extend(distances.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    all_distances = np.array(all_distances)
    all_labels = np.array(all_labels)
    
    # Grid search for optimal parameters
    best_f1 = 0
    best_alpha = 1.0
    best_beta = 0.0
    
    alphas = np.linspace(0.1, 10.0, 20)
    betas = np.linspace(-5.0, 5.0, 20)
    
    for alpha in alphas:
        for beta in betas:
            probs = distance_to_probability(all_distances, alpha, beta)
            _, f1 = find_optimal_threshold(all_labels, probs)
            
            if f1 > best_f1:
                best_f1 = f1
                best_alpha = alpha
                best_beta = beta
    
    print(f"Optimal parameters: alpha={best_alpha:.3f}, beta={best_beta:.3f}, F1={best_f1:.3f}")
    return best_alpha, best_beta


def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
    
    # Path to your binary labeled dataset
    binary_dataset_path = 'AnnotatedMSR/llm_generated.txt'  # Replace with your dataset path
    
    # Load the binary dataset
    binary_dataset = BinaryLabelDataset(binary_dataset_path, tokenizer)
    
    # Create DataLoader
    binary_loader = DataLoader(
        binary_dataset,
        batch_size=32,
        shuffle=False,
        collate_fn=collate_fn_eval
    )

    model_dims = [384, 192, 96, 64, 48, 32, 24, 16]  # Dimensions you trained with
    db_name = "llm_generated"
    for dim in model_dims:
        model_name = f"paws_hyp_{dim}"
        model_path = f'saved_models3/{model_name}.pt'
        
        print(f"Evaluating model: {model_name}")
        
        # Initialize model
        model = HyperbolicMapper(output_dim=dim).to(device)
        
        # Load weights
        model.load_state_dict(torch.load(model_path, map_location=device))
        
        # Optimize distance conversion parameters
        alpha, beta = optimize_distance_conversion(model, binary_loader, device)
        
        # Evaluate with optimized parameters
        results = evaluate_model(model, binary_loader, device, alpha, beta)
        
        # Print results
        print(f"Model: {model_name}")
        print(f"ROC AUC: {results['roc_auc']:.4f}")
        print(f"PR AUC: {results['pr_auc']:.4f}")

        # Plot curves
        plot_curves(results,db_name, model_name)
        
        # Save results
        with open(f"results/{db_name}_{model_name}_eval.json", 'w') as f:
            json_results = {
                'model': model_name,
                'roc_auc': float(results['roc_auc']),
                'pr_auc': float(results['pr_auc']),
                'alpha': float(alpha),
                'beta': float(beta),
                'optimal_threshold': float(find_optimal_threshold(results['labels'], results['probs'])[0])
            }
            json.dump(json_results, f, indent=2)

    model_dims = [384, 192, 96, 64, 48, 32, 24, 16]  # Dimensions you trained with
    db_name = "llm_generated"
    for dim in model_dims:
        model_name = f"hyp_{dim}"
        model_path = f'saved_models3/{model_name}.pt'
        
        print(f"Evaluating model: {model_name}")
        
        # Initialize model
        model = HyperbolicMapper(output_dim=dim).to(device)
        
        # Load weights
        model.load_state_dict(torch.load(model_path, map_location=device))
        
        # Optimize distance conversion parameters
        alpha, beta = optimize_distance_conversion(model, binary_loader, device)
        
        # Evaluate with optimized parameters
        results = evaluate_model(model, binary_loader, device, alpha, beta)
        
        # Print results
        print(f"Model: {model_name}")
        print(f"ROC AUC: {results['roc_auc']:.4f}")
        print(f"PR AUC: {results['pr_auc']:.4f}")

        # Plot curves
        plot_curves(results,db_name, model_name)
        
        # Save results
        with open(f"results/{db_name}_{model_name}_eval.json", 'w') as f:
            json_results = {
                'model': model_name,
                'roc_auc': float(results['roc_auc']),
                'pr_auc': float(results['pr_auc']),
                'alpha': float(alpha),
                'beta': float(beta),
                'optimal_threshold': float(find_optimal_threshold(results['labels'], results['probs'])[0])
            }
            json.dump(json_results, f, indent=2)
        
        
    # Path to your binary labeled dataset
    binary_dataset_path = 'AnnotatedMSR/msr_test_custom.txt'  # Replace with your dataset path
    
    # Load the binary dataset
    binary_dataset = BinaryLabelDataset(binary_dataset_path, tokenizer)
    
    # Create DataLoader
    binary_loader = DataLoader(
        binary_dataset,
        batch_size=32,
        shuffle=False,
        collate_fn=collate_fn_eval
    )
    
    model_dims = [384, 192, 96, 64, 48, 32, 24, 16]
    db_name = "msr_test_custom"
    for dim in model_dims:
        model_name = f"paws_hyp_{dim}"
        model_path = f'saved_models3/{model_name}.pt'
        
        print(f"Evaluating model: {model_name}")
        
        # Initialize model
        model = HyperbolicMapper(output_dim=dim).to(device)
        
        # Load weights
        model.load_state_dict(torch.load(model_path, map_location=device))
        
        # Optimize distance conversion parameters
        alpha, beta = optimize_distance_conversion(model, binary_loader, device)
        
        # Evaluate with optimized parameters
        results = evaluate_model(model, binary_loader, device, alpha, beta)
        
        # Print results
        print(f"Model: {model_name}")
        print(f"ROC AUC: {results['roc_auc']:.4f}")
        print(f"PR AUC: {results['pr_auc']:.4f}")

        # Plot curves
        plot_curves(results,db_name, model_name)
        
        # Save results
        with open(f"results/{db_name}_{model_name}_eval.json", 'w') as f:
            json_results = {
                'model': model_name,
                'roc_auc': float(results['roc_auc']),
                'pr_auc': float(results['pr_auc']),
                'alpha': float(alpha),
                'beta': float(beta),
                'optimal_threshold': float(find_optimal_threshold(results['labels'], results['probs'])[0])
            }
            json.dump(json_results, f, indent=2)

    model_dims = [384, 192, 96, 64, 48, 32, 24, 16]
    db_name = "msr_test_custom"
    for dim in model_dims:
        model_name = f"hyp_{dim}"
        model_path = f'saved_models3/{model_name}.pt'
        
        print(f"Evaluating model: {model_name}")
        
        # Initialize model
        model = HyperbolicMapper(output_dim=dim).to(device)
        
        # Load weights
        model.load_state_dict(torch.load(model_path, map_location=device))
        
        # Optimize distance conversion parameters
        alpha, beta = optimize_distance_conversion(model, binary_loader, device)
        
        # Evaluate with optimized parameters
        results = evaluate_model(model, binary_loader, device, alpha, beta)
        
        # Print results
        print(f"Model: {model_name}")
        print(f"ROC AUC: {results['roc_auc']:.4f}")
        print(f"PR AUC: {results['pr_auc']:.4f}")

        # Plot curves
        plot_curves(results,db_name, model_name)
        
        # Save results
        with open(f"results/{db_name}_{model_name}_eval.json", 'w') as f:
            json_results = {
                'model': model_name,
                'roc_auc': float(results['roc_auc']),
                'pr_auc': float(results['pr_auc']),
                'alpha': float(alpha),
                'beta': float(beta),
                'optimal_threshold': float(find_optimal_threshold(results['labels'], results['probs'])[0])
            }
            json.dump(json_results, f, indent=2)


# if __name__ == "__main__":
#     # Create directories if they don't exist
os.makedirs("results", exist_ok=True)
os.makedirs("plots", exist_ok=True)

main()



Using device: cuda
!!!!!!total problem rows =  0
Loaded 408 sentence pairs
Evaluating model: paws_hyp_384
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:02<00:00,  6.07it/s]


Optimal parameters: alpha=1.142, beta=-1.842, F1=0.806


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.00it/s]


Model: paws_hyp_384
ROC AUC: 0.7378
PR AUC: 0.8097
Evaluating model: paws_hyp_192
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 26.92it/s]


Optimal parameters: alpha=1.142, beta=1.316, F1=0.808


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 26.72it/s]


Model: paws_hyp_192
ROC AUC: 0.7363
PR AUC: 0.8008


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_96
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 27.00it/s]


Optimal parameters: alpha=1.663, beta=2.895, F1=0.805


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.76it/s]


Model: paws_hyp_96
ROC AUC: 0.7198
PR AUC: 0.7959
Evaluating model: paws_hyp_64


  model.load_state_dict(torch.load(model_path, map_location=device))


Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.64it/s]


Optimal parameters: alpha=1.663, beta=2.368, F1=0.800


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.83it/s]


Model: paws_hyp_64
ROC AUC: 0.7355
PR AUC: 0.8043


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_48
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.43it/s]


Optimal parameters: alpha=0.621, beta=1.842, F1=0.809


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.48it/s]


Model: paws_hyp_48
ROC AUC: 0.7263
PR AUC: 0.7907


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_32
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.82it/s]


Optimal parameters: alpha=4.268, beta=-0.263, F1=0.790


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 23.75it/s]


Model: paws_hyp_32
ROC AUC: 0.7253
PR AUC: 0.8034
Evaluating model: paws_hyp_24
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 23.81it/s]


Optimal parameters: alpha=0.621, beta=2.895, F1=0.789


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 21.84it/s]


Model: paws_hyp_24
ROC AUC: 0.7200
PR AUC: 0.7964
Evaluating model: paws_hyp_16


  model.load_state_dict(torch.load(model_path, map_location=device))


Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.26it/s]


Optimal parameters: alpha=0.100, beta=1.316, F1=0.786


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 28.63it/s]


Model: paws_hyp_16
ROC AUC: 0.7064
PR AUC: 0.7878


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_384
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 28.00it/s]


Optimal parameters: alpha=0.100, beta=1.316, F1=0.798


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 29.28it/s]


Model: hyp_384
ROC AUC: 0.7076
PR AUC: 0.7825


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_192
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 26.97it/s]


Optimal parameters: alpha=0.621, beta=-0.263, F1=0.801


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 23.96it/s]


Model: hyp_192
ROC AUC: 0.7162
PR AUC: 0.7845


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_96
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.68it/s]


Optimal parameters: alpha=0.621, beta=2.368, F1=0.799


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.64it/s]


Model: hyp_96
ROC AUC: 0.7111
PR AUC: 0.7825
Evaluating model: hyp_64
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 27.15it/s]


Optimal parameters: alpha=2.705, beta=3.947, F1=0.799


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 29.68it/s]


Model: hyp_64
ROC AUC: 0.7193
PR AUC: 0.7823
Evaluating model: hyp_48
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.64it/s]


Optimal parameters: alpha=0.621, beta=-0.263, F1=0.799


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 28.86it/s]


Model: hyp_48
ROC AUC: 0.7121
PR AUC: 0.7853


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_32
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.50it/s]


Optimal parameters: alpha=1.663, beta=2.895, F1=0.782


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 29.75it/s]


Model: hyp_32
ROC AUC: 0.7039
PR AUC: 0.7790
Evaluating model: hyp_24


  model.load_state_dict(torch.load(model_path, map_location=device))


Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.59it/s]


Optimal parameters: alpha=1.142, beta=0.789, F1=0.797


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 27.55it/s]


Model: hyp_24
ROC AUC: 0.7291
PR AUC: 0.8011


  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_16
Initialized model


Collecting distances: 100%|█████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 24.07it/s]


Optimal parameters: alpha=0.621, beta=-1.316, F1=0.797


Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 25.27it/s]


Model: hyp_16
ROC AUC: 0.6948
PR AUC: 0.7752


  model.load_state_dict(torch.load(model_path, map_location=device))


!!!!!!total problem rows =  5
Loaded 95 sentence pairs
Evaluating model: paws_hyp_384
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 28.78it/s]


Optimal parameters: alpha=0.621, beta=0.263, F1=0.600


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 28.15it/s]

Model: paws_hyp_384
ROC AUC: 0.8953
PR AUC: 0.5249





Evaluating model: paws_hyp_192
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 23.02it/s]


Optimal parameters: alpha=0.621, beta=0.263, F1=0.632


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.38it/s]

Model: paws_hyp_192
ROC AUC: 0.9059
PR AUC: 0.5802



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_96
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.93it/s]


Optimal parameters: alpha=0.100, beta=-0.263, F1=0.588


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.37it/s]

Model: paws_hyp_96
ROC AUC: 0.8824
PR AUC: 0.4865



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_64
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 28.36it/s]


Optimal parameters: alpha=0.621, beta=-0.789, F1=0.667


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.64it/s]

Model: paws_hyp_64
ROC AUC: 0.8776
PR AUC: 0.5985



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_48
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.96it/s]


Optimal parameters: alpha=1.142, beta=-3.947, F1=0.636


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 29.58it/s]

Model: paws_hyp_48
ROC AUC: 0.8871
PR AUC: 0.6240



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_32
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.20it/s]


Optimal parameters: alpha=0.100, beta=-1.842, F1=0.600


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.47it/s]

Model: paws_hyp_32
ROC AUC: 0.8824
PR AUC: 0.5656



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_24
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.92it/s]


Optimal parameters: alpha=0.621, beta=2.895, F1=0.545


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.96it/s]

Model: paws_hyp_24
ROC AUC: 0.8776
PR AUC: 0.4365



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: paws_hyp_16
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 24.07it/s]


Optimal parameters: alpha=0.100, beta=-1.842, F1=0.588


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.78it/s]

Model: paws_hyp_16
ROC AUC: 0.8788
PR AUC: 0.5081





Evaluating model: hyp_384


  model.load_state_dict(torch.load(model_path, map_location=device))


Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 24.28it/s]


Optimal parameters: alpha=0.100, beta=1.316, F1=0.600


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 22.59it/s]

Model: hyp_384
ROC AUC: 0.9106
PR AUC: 0.5717



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_192
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.05it/s]


Optimal parameters: alpha=0.100, beta=2.368, F1=0.632


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.15it/s]

Model: hyp_192
ROC AUC: 0.9118
PR AUC: 0.5643



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_96
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 29.65it/s]


Optimal parameters: alpha=1.663, beta=3.421, F1=0.632


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 23.23it/s]

Model: hyp_96
ROC AUC: 0.9082
PR AUC: 0.5592



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_64
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 21.67it/s]


Optimal parameters: alpha=0.100, beta=0.263, F1=0.593


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 19.66it/s]

Model: hyp_64
ROC AUC: 0.9200
PR AUC: 0.5806



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_48
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 22.62it/s]


Optimal parameters: alpha=0.621, beta=0.789, F1=0.625


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 26.90it/s]

Model: hyp_48
ROC AUC: 0.9188
PR AUC: 0.6201





Evaluating model: hyp_32
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 24.06it/s]


Optimal parameters: alpha=0.100, beta=-4.474, F1=0.600


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 21.00it/s]

Model: hyp_32
ROC AUC: 0.9094
PR AUC: 0.5479



  model.load_state_dict(torch.load(model_path, map_location=device))


Evaluating model: hyp_24
Initialized model


Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.60it/s]


Optimal parameters: alpha=0.100, beta=-0.789, F1=0.632


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.20it/s]

Model: hyp_24
ROC AUC: 0.9129
PR AUC: 0.5540





Evaluating model: hyp_16
Initialized model


  model.load_state_dict(torch.load(model_path, map_location=device))
Collecting distances: 100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 20.42it/s]


Optimal parameters: alpha=0.100, beta=-0.263, F1=0.588


Evaluating: 100%|█████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.69it/s]

Model: hyp_16
ROC AUC: 0.8753
PR AUC: 0.5136



