In [None]:
import pandas as pd
import random

from simpletransformers.classification import ClassificationModel

In [None]:

def f1(tp, fp, fn):
    if (tp + 0.5 * (fp + fn)) == 0:
        return 0
    return tp / (tp + 0.5 * (fp + fn))

def augment_text(row, deletion_prob=0.05, swap_prob=0.3):
    # Tokenize the text
    tokens = row['text'].split()
    
    for i in range(len(tokens)):
        if random.random() < swap_prob:
            swap_i = random.randint(0, len(tokens) - 1)
            if swap_i != i:  # Ensure not swapping with itself
                tokens[i], tokens[swap_i] = tokens[swap_i], tokens[i]
    
    # Apply deletion
    tokens = [token for token in tokens if random.random() >= deletion_prob]
    
    # Reconstruct the augmented text
    augmented_text = ' '.join(tokens)
    return augmented_text

def train_deberta(learning_rate, batch_size, num_epochs, weight_decay, dropout, num_layers_unfrozen,
                  train_file='data/train_paraphrase_upsampled.csv', test_file='data/dev_set.csv',
                  save_path=None, test_results_path='dev.txt'):

    train_df = pd.read_csv(train_file)
    test_df = pd.read_csv(test_file)

    # Model configuration with hyperparameters
    model_args = {
        "num_train_epochs": 1,
        "train_batch_size": batch_size,
        "eval_batch_size": batch_size,
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "output_dir": f"outputs_lr_{learning_rate}_batch_size_{batch_size}_unfreeze_{num_layers_unfrozen}",
        "overwrite_output_dir": True,
        "save_best_model": True,
        "save_eval_checkpoints": False,
        "save_model_every_epoch": False,
        "use_early_stopping": False,
        "use_multiprocessing": False,
        "use_multiprocessing_for_evaluation": False,
        "reprocess_input_data": True,
        "save_steps": -1,
        "fp16": False,  # Ensure FP16 is disabled
        "dropout": dropout
    }

    # Initialize DeBERTa model
    model = ClassificationModel(
        "deberta",  # Model type
        "microsoft/deberta-base",
        num_labels=2,
        args=model_args,
        # use_cuda=torch.cuda.is_available()
    )

    # Unfreeze the last `num_layers_unfrozen` layers + classifier head
    model_layers = list(model.model.deberta.encoder.layer)
    num_total_layers = len(model_layers)
    layers_to_unfreeze = min(num_layers_unfrozen, num_total_layers)

    for name, param in model.model.named_parameters():
        param.requires_grad = False  # Freeze everything first

    for i in range(num_total_layers - layers_to_unfreeze, num_total_layers):
        for param in model_layers[i].parameters():
            param.requires_grad = True  # Unfreeze selected layers

    # Ensure classifier head is always trainable
    for name, param in model.model.named_parameters():
        if "classifier" in name:
            param.requires_grad = True
    
    cols = ["text", "label"]
    
    # Train for the required number of epochs
    for epoch in range(num_epochs):
        _train_df = train_df.copy()

        if epoch > 4:
            _train_df["text"] = _train_df.apply(lambda row: augment_text(row), axis=1)
        
        # Train the model
        model.train_model(_train_df[cols])
        
    print("Evaluating on dev set")
    preds, _ = model.predict(test_df[cols])

    with open(test_results_path, 'w+') as f:
        for pred in preds:
            f.write(pred + '\n')
    
    if save_path:
        print("Saving final model to", save_path)
        model.model.save_pretrained(save_path)
        model.tokenizer.save_pretrained(save_path)
        model.config.save_pretrained(save_path)