In [1]:
import os
import csv
import pandas as pd
import torch
import re
import nltk
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback, RobertaTokenizer, RobertaForSequenceClassification
import torch.nn as nn
import wandb

from google.colab import auth
from google.colab import drive
from google.colab import userdata

In [2]:
# =========================
# STEP 0: mount to drive
# =========================
#auth.authenticate_user()
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# =========================
# STEP 1: Initialize WandB
# =========================
# Retrieve API Key from Colab Secrets
wandb_api_key = userdata.get('WANDB_API_KEY')

if wandb_api_key:
    os.environ["WANDB_API_KEY"] = wandb_api_key
    wandb.login(key=wandb_api_key)
    print("✅ WandB Logged in Securely")
else:
    print("❌ Error: WANDB_API_KEY not found. Set it in Colab Secrets.")


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maalhaizaey[0m ([33mabdulrahim-alhaizaey[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ WandB Logged in Securely


In [4]:
# =========================
# STEP 2: LOAD DATA & CLEAN TEXT
# =========================
nltk.download('stopwords')
nltk.download('wordnet')

multi_data = pd.read_csv("/content/10006_dataset_Multi.csv", encoding='ISO-8859-1')

def clean_text(text):
    if pd.isnull(text):
        return ""
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text).lower()
    return text

multi_data['cleaned_text'] = multi_data['text'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [5]:
# =========================
# STEP 3: PREPARE DATASETS & LABEL ENCODING (Train Only)
# =========================
multi_train, multi_val = train_test_split(multi_data, test_size=0.2, stratify=multi_data['label'], random_state=42)

# Fit LabelEncoder only on training data to prevent leakage
multi_le = LabelEncoder()
multi_le.fit(multi_train['label'])
multi_train['encoded_label'] = multi_le.transform(multi_train['label'])
multi_val['encoded_label'] = multi_le.transform(multi_val['label'])


In [6]:
# =========================
# STEP 4: TOKENIZATION (Train Data Only for max_length)
# =========================
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Calculate max_length using training data
max_length = min(tokenizer.model_max_length, max(multi_train['cleaned_text'].apply(lambda x: len(tokenizer.tokenize(x)))))
print(f"Using max_length from training set only: {max_length}")

def tokenize_data(df, label_col):
    encodings = tokenizer(df['cleaned_text'].tolist(), truncation=True, padding=True, max_length=max_length, return_tensors="pt")
    labels = torch.tensor(df[label_col].tolist())
    return encodings, labels

train_multi_enc, train_multi_labels = tokenize_data(multi_train, 'encoded_label')
val_multi_enc, val_multi_labels = tokenize_data(multi_val, 'encoded_label')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Using max_length from training set only: 116


In [7]:
# =========================
# STEP 5: DATASET CLASS & METRICS
# =========================
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted', zero_division=0)
    mcc = matthews_corrcoef(labels, predictions)
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mcc": mcc
    }


In [8]:
# =========================
# STEP 6: FOCAL LOSS & CUSTOM TRAINER
# =========================
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean() if self.reduction == 'mean' else focal_loss.sum()

class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.training_loss = []
        self.validation_loss = []
        self.results = []

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = FocalLoss(alpha=0.25, gamma=2.0)
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

    def evaluate(self, *args, **kwargs):
        output = super().evaluate(*args, **kwargs)
        self.validation_loss.append(output['eval_loss'])
        self.results.append(output.copy())
        return output

    def log(self, logs, *args, **kwargs):
        super().log(logs, *args, **kwargs)
        if 'loss' in logs:
            self.training_loss.append(logs['loss'])


In [9]:
# =========================
# STEP 7: ADD PLOTTING (Confusion Matrices & Loss Curves)
# =========================
def plot_confusion_matrix(labels, predictions, label_encoder, output_dir, run_name):
    class_names = label_encoder.classes_
    conf_matrix = confusion_matrix(labels, predictions)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_numbers.png")
    plt.close()

def plot_confusion_matrix_percent(labels, predictions, label_encoder, output_dir, run_name):
    class_names = label_encoder.classes_
    conf_matrix = confusion_matrix(labels, predictions, normalize='true')
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='.2f', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_percent.png")
    plt.close()

def plot_confusion_matrix_class_weighted(labels, predictions, label_encoder, output_dir, run_name):
    conf_matrix = confusion_matrix(labels, predictions, normalize='true')
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='.2f', cmap='coolwarm', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_weighted.png")
    plt.close()

def plot_loss(training_loss, validation_loss, output_dir, run_name):
    min_length = min(len(training_loss), len(validation_loss))
    training_loss = training_loss[:min_length]
    validation_loss = validation_loss[:min_length]
    epochs = range(1, min_length + 1)
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, training_loss, label='Training Loss', marker='o')
    plt.plot(epochs, validation_loss, label='Validation Loss', marker='s', linestyle='--')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{output_dir}/{run_name}_loss_curves.png")
    plt.close()


In [10]:
# =========================
# STEP 8: TRAIN FUNCTION
# =========================
def train_roberta(train_dataset, eval_dataset, num_labels, output_dir, label_encoder, config=None):
    with wandb.init(config=config):
        config = wandb.config
        run_name = wandb.run.name

        base_dir = "/content/drive/MyDrive/RoBERTa_8_Results"
        run_dir = os.path.join(base_dir, run_name)
        os.makedirs(run_dir, exist_ok=True)

        model = RobertaForSequenceClassification.from_pretrained(
            "roberta-base",
            num_labels=num_labels,
            hidden_dropout_prob=config.dropout,
            attention_probs_dropout_prob=config.dropout
        )

        num_devices = torch.cuda.device_count()  # Will be 1 on Colab
        examples_per_step = config.batch_size * num_devices * config.gradient_accumulation_steps
        total_steps = int(np.ceil(len(train_dataset) / examples_per_step) * config.num_train_epochs)

        warmup_steps = int(0.1 * total_steps)
        print(f"Total Steps: {total_steps}, Warmup Steps (10%): {warmup_steps}")

        training_args = TrainingArguments(
            output_dir=output_dir,
            run_name=run_name,
            num_train_epochs=config.num_train_epochs,
            per_device_train_batch_size=config.batch_size,
            per_device_eval_batch_size=config.batch_size,
            learning_rate=config.learning_rate,
            warmup_steps=warmup_steps,
            weight_decay=config.weight_decay,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            fp16=True,
            gradient_checkpointing=True,
            save_total_limit=2,
            gradient_accumulation_steps=config.gradient_accumulation_steps,
            max_grad_norm=config.max_grad_norm,
            lr_scheduler_type="cosine_with_restarts",
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            report_to="wandb"
        )

        trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
        )

        trainer.train()

        # Save Best Model
        best_model_path = os.path.join(run_dir, "best_model")
        trainer.save_model(best_model_path)
        print(f"✅ Best model saved to {best_model_path}")

        # Evaluate Best Model
        results = trainer.evaluate(eval_dataset=eval_dataset)

        predictions_obj = trainer.predict(eval_dataset)
        predictions = np.argmax(predictions_obj.predictions, axis=1)
        labels = predictions_obj.label_ids

        wandb.log({
            "eval_loss": results.get("eval_loss"),
            "eval_accuracy": results.get("eval_accuracy"),
            "eval_precision": results.get("eval_precision"),
            "eval_recall": results.get("eval_recall"),
            "eval_f1": results.get("eval_f1"),
            "eval_mcc": results.get("eval_mcc")
        })

        plot_confusion_matrix(labels, predictions, label_encoder, run_dir, run_name)
        plot_confusion_matrix_percent(labels, predictions, label_encoder, run_dir, run_name)
        plot_confusion_matrix_class_weighted(labels, predictions, label_encoder, run_dir, run_name)
        plot_loss(trainer.training_loss, trainer.validation_loss, run_dir, run_name)

        # Save run parameters and results to a CSV file
        results_csv = os.path.join(run_dir, "results.csv")
        with open(results_csv, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Parameter", "Value"])
            for key, value in config.items():
                writer.writerow([key, value])
            writer.writerow(["Epoch", "Training Loss", "Validation Loss", "Accuracy", "Precision", "Recall", "F1", "Mcc"])
            for epoch, result in enumerate(trainer.results, start=1):
                writer.writerow([
                    epoch,
                    trainer.training_loss[epoch - 1] if epoch - 1 < len(trainer.training_loss) else None,
                    trainer.validation_loss[epoch - 1] if epoch - 1 < len(trainer.validation_loss) else None,
                    result.get("eval_accuracy"),
                    result.get("eval_precision"),
                    result.get("eval_recall"),
                    result.get("eval_f1"),
                    result.get("eval_mcc")
                ])

        return results

In [11]:
# =========================
# STEP 9: RUN SWEEP CONFIGURATION
# =========================
train_dataset = TextDataset(train_multi_enc, train_multi_labels)
val_dataset = TextDataset(val_multi_enc, val_multi_labels)

sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'eval_loss', 'goal': 'minimize'},
    'parameters': {
        'num_train_epochs': {'values': [12]},
        'learning_rate': {'min': 1e-5, 'max': 5e-5},
        'batch_size': {'values': [8, 16, 32]},
        'weight_decay': {'min': 0.01, 'max': 0.05},
        'dropout': {'min': 0.05, 'max': 0.5},
        'gradient_accumulation_steps': {'values': [2, 4]},
        'max_grad_norm': {'values': [1.0, 2.0]}
    },
}

In [12]:
sweep_id = wandb.sweep(sweep=sweep_config, project='RoBERTa_8')

wandb.agent(sweep_id, function=lambda: train_roberta(
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    num_labels=len(multi_le.classes_),
    output_dir="multi_output",
    label_encoder=multi_le
))


Create sweep with ID: 4tf3f9z8
Sweep URL: https://wandb.ai/abdulrahim-alhaizaey/RoBERTa_8/sweeps/4tf3f9z8


[34m[1mwandb[0m: Agent Starting Run: f2r1tbsy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4874726431101276
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 1.861988928598048e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.01112759556651764
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.3456,0.264829,0.645355,0.416483,0.645355,0.506253,0.0
2,0.2378,0.224523,0.645355,0.416483,0.645355,0.506253,0.0
3,0.2353,0.221883,0.645355,0.416483,0.645355,0.506253,0.0
4,0.2342,0.222444,0.645355,0.416483,0.645355,0.506253,0.0
5,0.2326,0.220326,0.645355,0.416483,0.645355,0.506253,0.0
6,0.2304,0.220149,0.645355,0.416483,0.645355,0.506253,0.0
7,0.2317,0.222572,0.645355,0.416483,0.645355,0.506253,0.0
8,0.2306,0.219711,0.645355,0.416483,0.645355,0.506253,0.0
9,0.2315,0.219173,0.645355,0.416483,0.645355,0.506253,0.0
10,0.2302,0.219086,0.645355,0.416483,0.645355,0.506253,0.0


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/confused-sweep-1/best_model


0,1
eval/accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f1,▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▂▁▂▁▁▂▁▁▁▁▁▁
eval/mcc,▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall,▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,█▂▅▆▄▃▄▁▂▃▄▅▅
eval/samples_per_second,▁▇▃▃▅▆▅█▇▆▅▄▄
eval/steps_per_second,▁▇▃▃▅▆▅█▇▆▅▄▄
eval_accuracy,▁

0,1
eval/accuracy,0.64535
eval/f1,0.50625
eval/loss,0.21909
eval/mcc,0.0
eval/precision,0.41648
eval/recall,0.64535
eval/runtime,0.8904
eval/samples_per_second,2248.368
eval/steps_per_second,70.753
eval_accuracy,0.64535


[34m[1mwandb[0m: Agent Starting Run: 4r46xzfr with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.21352309758911348
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.2356003877876264e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.047554053961837886


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2041,0.057743,0.853646,0.862788,0.853646,0.852123,0.741175
2,0.0577,0.054059,0.817682,0.863459,0.817682,0.829187,0.722522
3,0.0372,0.047118,0.872128,0.887061,0.872128,0.875814,0.789628
4,0.0267,0.046342,0.86963,0.891009,0.86963,0.874829,0.788087
5,0.0198,0.050244,0.872627,0.89213,0.872627,0.878112,0.791509
6,0.0142,0.041325,0.891109,0.903606,0.891109,0.894515,0.819343
7,0.0099,0.04629,0.890609,0.904783,0.890609,0.89425,0.821093
8,0.0074,0.051196,0.861139,0.892171,0.861139,0.869045,0.782993
9,0.0051,0.042876,0.898102,0.911556,0.898102,0.901471,0.83129


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/super-sweep-2/best_model


0,1
eval/accuracy,▄▁▆▆▆▇▇▅█▇
eval/f1,▃▁▆▅▆▇▇▅█▇
eval/loss,█▆▃▃▅▁▃▅▂▁
eval/mcc,▂▁▅▅▅▇▇▅█▇
eval/precision,▁▁▄▅▅▇▇▅█▇
eval/recall,▄▁▆▆▆▇▇▅█▇
eval/runtime,▆▃▁▅▃▄▅█▇█
eval/samples_per_second,▃▆█▄▆▅▄▁▂▁
eval/steps_per_second,▃▆█▄▆▅▄▁▂▁
eval_accuracy,▁

0,1
eval/accuracy,0.89111
eval/f1,0.89451
eval/loss,0.04132
eval/mcc,0.81934
eval/precision,0.90361
eval/recall,0.89111
eval/runtime,1.7437
eval/samples_per_second,1148.142
eval/steps_per_second,72.261
eval_accuracy,0.89111


[34m[1mwandb[0m: Agent Starting Run: 9m6paod8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.2175502282911708
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.905518888539008e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.028971344201183406


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2815,0.169851,0.707792,0.59862,0.707792,0.636623,0.392693
2,0.1119,0.081356,0.768731,0.834308,0.768731,0.784322,0.657441
3,0.0475,0.060236,0.821678,0.860774,0.821678,0.831722,0.725337
4,0.0328,0.042308,0.881119,0.894224,0.881119,0.882804,0.796346
5,0.0259,0.056273,0.837662,0.880188,0.837662,0.848867,0.747108
6,0.0201,0.054509,0.843656,0.883729,0.843656,0.853002,0.763194
7,0.0171,0.040957,0.887612,0.89967,0.887612,0.89103,0.81343
8,0.0136,0.047153,0.868132,0.891862,0.868132,0.874618,0.789697
9,0.0119,0.044368,0.871628,0.890653,0.871628,0.876761,0.792099
10,0.0108,0.043069,0.878621,0.896143,0.878621,0.883565,0.802113


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/hopeful-sweep-3/best_model


0,1
eval/accuracy,▁▃▅█▆▆█▇▇██
eval/f1,▁▅▆█▇▇█████
eval/loss,█▃▂▁▂▂▁▁▁▁▁
eval/mcc,▁▅▇█▇▇█████
eval/precision,▁▆▇████████
eval/recall,▁▃▅█▆▆█▇▇██
eval/runtime,▇▁▆▂█▆▆▅▃▆▆
eval/samples_per_second,▂█▃▇▁▃▃▄▆▃▃
eval/steps_per_second,▂█▃▇▁▃▃▄▆▃▃
eval_accuracy,▁

0,1
eval/accuracy,0.88761
eval/f1,0.89103
eval/loss,0.04096
eval/mcc,0.81343
eval/precision,0.89967
eval/recall,0.88761
eval/runtime,0.8962
eval/samples_per_second,2233.964
eval/steps_per_second,70.3
eval_accuracy,0.88761


[34m[1mwandb[0m: Agent Starting Run: 43dvpkkr with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.4589825587391758
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.8643818851441172e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.010424727931810847


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2995,0.222952,0.645355,0.416483,0.645355,0.506253,0.0
2,0.2351,0.259586,0.645355,0.416483,0.645355,0.506253,0.0
3,0.2358,0.254056,0.645355,0.416483,0.645355,0.506253,0.0
4,0.2326,0.262732,0.645355,0.416483,0.645355,0.506253,0.0


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/elated-sweep-4/best_model


0,1
eval/accuracy,▁▁▁▁▁
eval/f1,▁▁▁▁▁
eval/loss,▁▇▆█▁
eval/mcc,▁▁▁▁▁
eval/precision,▁▁▁▁▁
eval/recall,▁▁▁▁▁
eval/runtime,▁█▄▇▅
eval/samples_per_second,█▁▅▂▃
eval/steps_per_second,█▁▅▂▃
eval_accuracy,▁

0,1
eval/accuracy,0.64535
eval/f1,0.50625
eval/loss,0.22295
eval/mcc,0.0
eval/precision,0.41648
eval/recall,0.64535
eval/runtime,3.4464
eval/samples_per_second,580.889
eval/steps_per_second,72.829
eval_accuracy,0.64535


[34m[1mwandb[0m: Agent Starting Run: kad96j3k with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.1607683269385195
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.2438274772048047e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04152759415158302


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2454,0.139774,0.71978,0.786316,0.71978,0.733039,0.56394
2,0.0679,0.046219,0.845654,0.869895,0.845654,0.853214,0.750679
3,0.0346,0.049218,0.859141,0.880639,0.859141,0.865398,0.768748
4,0.0212,0.033884,0.907093,0.912594,0.907093,0.908414,0.839031
5,0.0142,0.043796,0.882118,0.901121,0.882118,0.887154,0.808283
6,0.0093,0.042776,0.885115,0.901253,0.885115,0.889491,0.812908
7,0.007,0.03921,0.898601,0.905884,0.898601,0.900621,0.828308


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/glowing-sweep-5/best_model


0,1
eval/accuracy,▁▆▆█▇▇██
eval/f1,▁▆▆█▇▇██
eval/loss,█▂▂▁▂▂▁▁
eval/mcc,▁▆▆█▇▇██
eval/precision,▁▆▆█▇▇██
eval/recall,▁▆▆█▇▇██
eval/runtime,▅▄▁█▁▅▆▇
eval/samples_per_second,▄▅█▁█▄▂▂
eval/steps_per_second,▄▅█▁█▄▂▂
eval_accuracy,▁

0,1
eval/accuracy,0.90709
eval/f1,0.90841
eval/loss,0.03388
eval/mcc,0.83903
eval/precision,0.91259
eval/recall,0.90709
eval/runtime,0.9302
eval/samples_per_second,2152.117
eval/steps_per_second,67.724
eval_accuracy,0.90709


[34m[1mwandb[0m: Agent Starting Run: p8goepgm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.13159836812757766
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.992669825033802e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04751580312720266


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2781,0.190817,0.644356,0.640784,0.644356,0.625954,0.420166
2,0.0921,0.048006,0.861139,0.872119,0.861139,0.863889,0.761301
3,0.036,0.03461,0.893606,0.895789,0.893606,0.894183,0.813278
4,0.0219,0.034339,0.902098,0.904846,0.902098,0.901423,0.826107
5,0.0131,0.056012,0.861638,0.888439,0.861638,0.86913,0.776332
6,0.0103,0.035778,0.896603,0.907457,0.896603,0.899893,0.826769
7,0.0062,0.034855,0.905594,0.911563,0.905594,0.907424,0.83794


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/decent-sweep-6/best_model


0,1
eval/accuracy,▁▇██▇███
eval/f1,▁▇██▇███
eval/loss,█▂▁▁▂▁▁▁
eval/mcc,▁▇██▇███
eval/precision,▁▇██▇███
eval/recall,▁▇██▇███
eval/runtime,▁▂▄█▂▆▁▅
eval/samples_per_second,█▇▅▁▇▃█▄
eval/steps_per_second,█▇▅▁▇▃█▄
eval_accuracy,▁

0,1
eval/accuracy,0.9021
eval/f1,0.90142
eval/loss,0.03434
eval/mcc,0.82611
eval/precision,0.90485
eval/recall,0.9021
eval/runtime,0.9226
eval/samples_per_second,2170.011
eval/steps_per_second,68.287
eval_accuracy,0.9021


[34m[1mwandb[0m: Agent Starting Run: 4izdxmdj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.07801351074950956
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 3.4067257383710574e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.037063915608556784


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2656,0.142829,0.740759,0.798074,0.740759,0.73417,0.556368
2,0.0714,0.042706,0.868132,0.882175,0.868132,0.87251,0.778233
3,0.0274,0.04343,0.874126,0.888273,0.874126,0.877689,0.788704
4,0.0154,0.036203,0.908092,0.911202,0.908092,0.906924,0.834757
5,0.0077,0.03351,0.914086,0.918381,0.914086,0.915072,0.849831
6,0.0045,0.031383,0.91958,0.921643,0.91958,0.920131,0.858259
7,0.0025,0.033789,0.912587,0.919221,0.912587,0.914738,0.848068
8,0.0016,0.032886,0.918581,0.920765,0.918581,0.919361,0.857009
9,0.0012,0.032829,0.923576,0.925023,0.923576,0.924142,0.865553


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/fancy-sweep-7/best_model


0,1
eval/accuracy,▁▆▆▇██████
eval/f1,▁▆▆▇██████
eval/loss,█▂▂▁▁▁▁▁▁▁
eval/mcc,▁▆▆▇██████
eval/precision,▁▆▆▇██████
eval/recall,▁▆▆▇██████
eval/runtime,▁▃▁▃▁▄▂█▁▇
eval/samples_per_second,█▆█▆█▅▇▁█▂
eval/steps_per_second,█▆█▆█▅▇▁█▂
eval_accuracy,▁

0,1
eval/accuracy,0.91958
eval/f1,0.92013
eval/loss,0.03138
eval/mcc,0.85826
eval/precision,0.92164
eval/recall,0.91958
eval/runtime,0.9484
eval/samples_per_second,2110.937
eval/steps_per_second,66.428
eval_accuracy,0.91958


[34m[1mwandb[0m: Agent Starting Run: ozk0n8i3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.14018576004372665
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.5250407918001944e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04882860599112413


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.1959,0.058872,0.853147,0.862243,0.853147,0.847164,0.733501
2,0.046,0.050364,0.852148,0.885001,0.852148,0.861201,0.766179
3,0.0259,0.044663,0.877622,0.894005,0.877622,0.881612,0.797937
4,0.0153,0.034008,0.916583,0.916451,0.916583,0.915398,0.850841
5,0.0094,0.041547,0.905095,0.912489,0.905095,0.907346,0.838358
6,0.0055,0.036754,0.914086,0.916748,0.914086,0.915109,0.85011
7,0.0039,0.040848,0.903596,0.909867,0.903596,0.905355,0.835158


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/magic-sweep-8/best_model


0,1
eval/accuracy,▁▁▄█▇█▇█
eval/f1,▁▂▅█▇█▇█
eval/loss,█▆▄▁▃▂▃▁
eval/mcc,▁▃▅█▇█▇█
eval/precision,▁▄▅█▇█▇█
eval/recall,▁▁▄█▇█▇█
eval/runtime,▁▃▁▇▄▂█▃
eval/samples_per_second,█▆█▂▅▇▁▆
eval/steps_per_second,█▆█▂▅▇▁▆
eval_accuracy,▁

0,1
eval/accuracy,0.91658
eval/f1,0.9154
eval/loss,0.03401
eval/mcc,0.85084
eval/precision,0.91645
eval/recall,0.91658
eval/runtime,0.9139
eval/samples_per_second,2190.562
eval/steps_per_second,68.934
eval_accuracy,0.91658


[34m[1mwandb[0m: Agent Starting Run: 78j24vdx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.053896416231812766
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.423775487589857e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.048628282054960344


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2035,0.059312,0.850649,0.865122,0.850649,0.853513,0.745546
2,0.0401,0.035315,0.894106,0.897921,0.894106,0.895167,0.816532
3,0.0201,0.039594,0.9001,0.908221,0.9001,0.901685,0.827285
4,0.0098,0.034273,0.924575,0.926748,0.924575,0.922729,0.864173
5,0.0034,0.037141,0.912587,0.921123,0.912587,0.915012,0.850156
6,0.0019,0.044717,0.913586,0.915424,0.913586,0.910518,0.844458
7,0.0012,0.038224,0.927073,0.927066,0.927073,0.926191,0.86936


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/faithful-sweep-9/best_model


0,1
eval/accuracy,▁▅▆█▇▇██
eval/f1,▁▅▆█▇▆██
eval/loss,█▁▂▁▂▄▂▁
eval/mcc,▁▅▆█▇▇██
eval/precision,▁▅▆█▇▇██
eval/recall,▁▅▆█▇▇██
eval/runtime,▅▄▃▆█▁▁▄
eval/samples_per_second,▄▅▆▃▁██▅
eval/steps_per_second,▄▅▆▃▁██▅
eval_accuracy,▁

0,1
eval/accuracy,0.92458
eval/f1,0.92273
eval/loss,0.03427
eval/mcc,0.86417
eval/precision,0.92675
eval/recall,0.92458
eval/runtime,0.9233
eval/samples_per_second,2168.374
eval/steps_per_second,68.236
eval_accuracy,0.92458


[34m[1mwandb[0m: Agent Starting Run: fw438e87 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.10017176068297476
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 3.606612923512613e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.0407276415611722


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2648,0.145752,0.724775,0.777635,0.724775,0.735426,0.546233
2,0.0706,0.043484,0.864136,0.882477,0.864136,0.868745,0.770091
3,0.0303,0.032824,0.896603,0.902484,0.896603,0.898047,0.820676
4,0.0159,0.033127,0.914086,0.918328,0.914086,0.913209,0.847068
5,0.0094,0.032631,0.906593,0.913482,0.906593,0.908133,0.838208
6,0.0056,0.029538,0.917582,0.921297,0.917582,0.918503,0.856601
7,0.0034,0.031846,0.915584,0.921434,0.915584,0.917457,0.854763
8,0.002,0.03237,0.923576,0.927417,0.923576,0.92481,0.866697
9,0.0016,0.033691,0.920579,0.924902,0.920579,0.922001,0.862727


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/treasured-sweep-10/best_model


0,1
eval/accuracy,▁▆▇█▇█████
eval/f1,▁▆▇█▇█████
eval/loss,█▂▁▁▁▁▁▁▁▁
eval/mcc,▁▆▇█▇█████
eval/precision,▁▆▇█▇█████
eval/recall,▁▆▇█▇█████
eval/runtime,▂▂▄▁█▁▁▁▂▄
eval/samples_per_second,▇▇▅█▁███▇▅
eval/steps_per_second,▇▇▅█▁███▇▅
eval_accuracy,▁

0,1
eval/accuracy,0.91758
eval/f1,0.9185
eval/loss,0.02954
eval/mcc,0.8566
eval/precision,0.9213
eval/recall,0.91758
eval/runtime,0.9232
eval/samples_per_second,2168.565
eval/steps_per_second,68.242
eval_accuracy,0.91758


[34m[1mwandb[0m: Agent Starting Run: d1t95e36 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.058529503694848654
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.300495514427046e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04230420352896582


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2399,0.10419,0.777722,0.816962,0.777722,0.779972,0.633577
2,0.0481,0.0394,0.87962,0.901855,0.87962,0.885491,0.803647
3,0.02,0.035184,0.896603,0.911265,0.896603,0.900702,0.82268
4,0.0101,0.034209,0.914086,0.917625,0.914086,0.913728,0.848337
5,0.005,0.03109,0.929071,0.930645,0.929071,0.929465,0.874437
6,0.002,0.03317,0.921079,0.921507,0.921079,0.919733,0.859064
7,0.0012,0.033157,0.926074,0.926308,0.926074,0.925569,0.868254
8,0.0007,0.032365,0.926573,0.928196,0.926573,0.927129,0.870622


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/comic-sweep-11/best_model


0,1
eval/accuracy,▁▆▆▇█████
eval/f1,▁▆▇▇█████
eval/loss,█▂▁▁▁▁▁▁▁
eval/mcc,▁▆▆▇█████
eval/precision,▁▆▇▇█▇███
eval/recall,▁▆▆▇█████
eval/runtime,▁▂▅▂▄▄▂▁█
eval/samples_per_second,█▆▄▇▅▅▇█▁
eval/steps_per_second,█▆▄▇▅▅▇█▁
eval_accuracy,▁

0,1
eval/accuracy,0.92907
eval/f1,0.92947
eval/loss,0.03109
eval/mcc,0.87444
eval/precision,0.93065
eval/recall,0.92907
eval/runtime,0.9992
eval/samples_per_second,2003.519
eval/steps_per_second,63.048
eval_accuracy,0.92907


[34m[1mwandb[0m: Agent Starting Run: 4q85n5bj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.06980858494928484
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 1.53925407401223e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.047315646905475954


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2509,0.115364,0.756743,0.764152,0.756743,0.746059,0.571296
2,0.0618,0.039941,0.881618,0.890605,0.881618,0.883044,0.795095
3,0.0251,0.045677,0.878621,0.892599,0.878621,0.882214,0.795294
4,0.0138,0.032779,0.918581,0.917901,0.918581,0.916557,0.852635
5,0.0079,0.037782,0.90959,0.915488,0.90959,0.910788,0.842836
6,0.0038,0.033253,0.923576,0.924462,0.923576,0.923345,0.864729
7,0.0027,0.034914,0.922577,0.923851,0.922577,0.922797,0.863883


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/glad-sweep-12/best_model


0,1
eval/accuracy,▁▆▆█▇███
eval/f1,▁▆▆█████
eval/loss,█▂▂▁▁▁▁▁
eval/mcc,▁▆▆█▇███
eval/precision,▁▇▇█████
eval/recall,▁▆▆█▇███
eval/runtime,▁▃▇▃██▅▇
eval/samples_per_second,█▆▂▆▁▁▄▂
eval/steps_per_second,█▆▂▆▁▁▄▂
eval_accuracy,▁

0,1
eval/accuracy,0.91858
eval/f1,0.91656
eval/loss,0.03278
eval/mcc,0.85264
eval/precision,0.9179
eval/recall,0.91858
eval/runtime,0.9357
eval/samples_per_second,2139.564
eval/steps_per_second,67.329
eval_accuracy,0.91858


[34m[1mwandb[0m: Agent Starting Run: ls8yno1l with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.05467332123109417
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 3.346220507668927e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04379376304435993


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2629,0.139075,0.724276,0.792799,0.724276,0.725767,0.542163
2,0.0653,0.042453,0.87013,0.883235,0.87013,0.873458,0.775708
3,0.0262,0.034891,0.898102,0.908435,0.898102,0.900535,0.821759
4,0.0128,0.030577,0.918581,0.92,0.918581,0.917583,0.853861
5,0.0053,0.033752,0.92008,0.922038,0.92008,0.918483,0.857252
6,0.0031,0.031372,0.921578,0.923513,0.921578,0.922318,0.862252
7,0.0015,0.0328,0.925075,0.926245,0.925075,0.925449,0.867827


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/sage-sweep-13/best_model


0,1
eval/accuracy,▁▆▇█████
eval/f1,▁▆▇█████
eval/loss,█▂▁▁▁▁▁▁
eval/mcc,▁▆▇█████
eval/precision,▁▆▇█████
eval/recall,▁▆▇█████
eval/runtime,█▄▇▄▁▂▂▅
eval/samples_per_second,▁▅▁▅█▇▇▄
eval/steps_per_second,▁▅▁▅█▇▇▄
eval_accuracy,▁

0,1
eval/accuracy,0.91858
eval/f1,0.91758
eval/loss,0.03058
eval/mcc,0.85386
eval/precision,0.92
eval/recall,0.91858
eval/runtime,0.9267
eval/samples_per_second,2160.325
eval/steps_per_second,67.982
eval_accuracy,0.91858


[34m[1mwandb[0m: Agent Starting Run: j1bak48d with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.05075967285023596
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 1.8632073013105667e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04717317903623568


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2405,0.086111,0.817682,0.799322,0.817682,0.806744,0.677111
2,0.0512,0.036587,0.897103,0.900964,0.897103,0.897083,0.818791
3,0.0219,0.040578,0.892607,0.90348,0.892607,0.894841,0.817091
4,0.011,0.03461,0.917083,0.916269,0.917083,0.913971,0.84924
5,0.0051,0.034995,0.917582,0.921111,0.917582,0.917193,0.853238
6,0.0023,0.032232,0.926573,0.925594,0.926573,0.925569,0.868239
7,0.0013,0.031771,0.927572,0.927592,0.927572,0.92742,0.871202
8,0.0006,0.033863,0.924076,0.925354,0.924076,0.924463,0.86597
9,0.0003,0.034451,0.925075,0.926282,0.925075,0.925459,0.867739
10,0.0003,0.034113,0.924575,0.926119,0.924575,0.925166,0.867069


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/toasty-sweep-14/best_model


0,1
eval/accuracy,▁▆▆▇▇██████
eval/f1,▁▆▆▇▇██████
eval/loss,█▂▂▁▁▁▁▁▁▁▁
eval/mcc,▁▆▆▇▇██████
eval/precision,▁▇▇▇███████
eval/recall,▁▆▆▇▇██████
eval/runtime,▂▄▁▄▃▁▅▂▆▃█
eval/samples_per_second,▇▅█▅▆█▄▇▃▆▁
eval/steps_per_second,▇▅█▅▆█▄▇▃▆▁
eval_accuracy,▁

0,1
eval/accuracy,0.92757
eval/f1,0.92742
eval/loss,0.03177
eval/mcc,0.8712
eval/precision,0.92759
eval/recall,0.92757
eval/runtime,0.9865
eval/samples_per_second,2029.401
eval/steps_per_second,63.862
eval_accuracy,0.92757


[34m[1mwandb[0m: Agent Starting Run: xvf0rdfk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.06730348337514822
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 1.4490559351618888e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.0488772621686708


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2596,0.097498,0.807692,0.828412,0.807692,0.811738,0.677129
2,0.0541,0.038527,0.883117,0.892921,0.883117,0.886533,0.801071
3,0.0248,0.040235,0.888611,0.89924,0.888611,0.890447,0.809188
4,0.013,0.032955,0.916084,0.915344,0.916084,0.914068,0.848493
5,0.0073,0.038983,0.902597,0.912105,0.902597,0.905232,0.835916
6,0.0044,0.035247,0.917083,0.918521,0.917083,0.916402,0.852683
7,0.003,0.031723,0.923077,0.923122,0.923077,0.922964,0.86336
8,0.0019,0.034269,0.922577,0.923761,0.922577,0.922933,0.863763
9,0.0013,0.033765,0.923576,0.924171,0.923576,0.923678,0.864805
10,0.0011,0.035042,0.920579,0.92176,0.920579,0.920975,0.86024


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/solar-sweep-15/best_model


0,1
eval/accuracy,▁▆▆█▇██████
eval/f1,▁▆▆▇▇██████
eval/loss,█▂▂▁▂▁▁▁▁▁▁
eval/mcc,▁▆▆▇▇██████
eval/precision,▁▆▆▇▇██████
eval/recall,▁▆▆█▇██████
eval/runtime,▃▁▂▆▂▃▂▄▆█▇
eval/samples_per_second,▆█▇▃▇▆▇▅▃▁▂
eval/steps_per_second,▆█▇▃▇▆▇▅▃▁▂
eval_accuracy,▁

0,1
eval/accuracy,0.92308
eval/f1,0.92296
eval/loss,0.03172
eval/mcc,0.86336
eval/precision,0.92312
eval/recall,0.92308
eval/runtime,0.9565
eval/samples_per_second,2093.103
eval/steps_per_second,65.867
eval_accuracy,0.92308


[34m[1mwandb[0m: Agent Starting Run: ctqgt2sk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.052695271144165744
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.3021336840901344e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04694540244636485


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2047,0.05905,0.845155,0.863072,0.845155,0.848996,0.741026
2,0.0421,0.04327,0.866633,0.887843,0.866633,0.872165,0.778721
3,0.0204,0.033123,0.91009,0.9143,0.91009,0.910477,0.842414
4,0.0084,0.039184,0.915584,0.915903,0.915584,0.91216,0.846529
5,0.0044,0.038695,0.910589,0.917846,0.910589,0.912228,0.845055
6,0.0027,0.043012,0.91958,0.917977,0.91958,0.91673,0.854545


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/driven-sweep-16/best_model


0,1
eval/accuracy,▁▃▇█▇█▇
eval/f1,▁▃▇███▇
eval/loss,█▄▁▃▃▄▁
eval/mcc,▁▃▇█▇█▇
eval/precision,▁▄█████
eval/recall,▁▃▇█▇█▇
eval/runtime,▇▄█▁▆▅▆
eval/samples_per_second,▂▅▁█▃▄▂
eval/steps_per_second,▂▅▁█▃▄▂
eval_accuracy,▁

0,1
eval/accuracy,0.91009
eval/f1,0.91048
eval/loss,0.03312
eval/mcc,0.84241
eval/precision,0.9143
eval/recall,0.91009
eval/runtime,0.9677
eval/samples_per_second,2068.928
eval/steps_per_second,65.106
eval_accuracy,0.91009


[34m[1mwandb[0m: Agent Starting Run: mx24mwmm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.1035657394236676
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 2.785763476289485e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.048936060271168966


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2147,0.069708,0.824176,0.849721,0.824176,0.825077,0.699382
2,0.0485,0.050172,0.84016,0.877082,0.84016,0.849564,0.751811
3,0.0237,0.046954,0.879121,0.898303,0.879121,0.883578,0.80106
4,0.014,0.034469,0.914086,0.913995,0.914086,0.911179,0.844182
5,0.008,0.039257,0.899101,0.908594,0.899101,0.901163,0.829466
6,0.0043,0.035236,0.915584,0.917445,0.915584,0.914898,0.850582
7,0.003,0.035205,0.91009,0.914863,0.91009,0.911637,0.844833


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/golden-sweep-17/best_model


0,1
eval/accuracy,▁▂▅█▇███
eval/f1,▁▃▆█▇███
eval/loss,█▄▃▁▂▁▁▁
eval/mcc,▁▃▆█▇███
eval/precision,▁▄▆█▇███
eval/recall,▁▂▅█▇███
eval/runtime,▁▁▂▄▂▂▃█
eval/samples_per_second,██▇▅▇▇▆▁
eval/steps_per_second,██▇▅▇▇▆▁
eval_accuracy,▁

0,1
eval/accuracy,0.91409
eval/f1,0.91118
eval/loss,0.03447
eval/mcc,0.84418
eval/precision,0.91399
eval/recall,0.91409
eval/runtime,1.0057
eval/samples_per_second,1990.686
eval/steps_per_second,62.644
eval_accuracy,0.91409


[34m[1mwandb[0m: Agent Starting Run: 8en3qhh4 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.07248160920660092
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.2621055083395145e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.049285287705257544


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.1642,0.054036,0.860639,0.863703,0.860639,0.853951,0.746276
2,0.0416,0.042344,0.877123,0.893199,0.877123,0.880956,0.790007
3,0.0189,0.038819,0.897103,0.903577,0.897103,0.898583,0.823532
4,0.0097,0.034751,0.924076,0.926202,0.924076,0.924133,0.865844
5,0.0052,0.038308,0.916084,0.917465,0.916084,0.915885,0.849763
6,0.0033,0.041603,0.921079,0.919056,0.921079,0.91914,0.857218
7,0.002,0.040609,0.920579,0.921369,0.920579,0.919506,0.857792


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/fiery-sweep-18/best_model


0,1
eval/accuracy,▁▃▅█▇███
eval/f1,▁▄▅█▇███
eval/loss,█▄▂▁▂▃▃▁
eval/mcc,▁▄▆█▇▇██
eval/precision,▁▄▅█▇▇▇█
eval/recall,▁▃▅█▇███
eval/runtime,▁▁▁▃▃▂▂█
eval/samples_per_second,███▆▆▆▇▁
eval/steps_per_second,███▆▆▆▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.92408
eval/f1,0.92413
eval/loss,0.03475
eval/mcc,0.86584
eval/precision,0.9262
eval/recall,0.92408
eval/runtime,2.0028
eval/samples_per_second,999.601
eval/steps_per_second,62.912
eval_accuracy,0.92408


[34m[1mwandb[0m: Agent Starting Run: 0ylcbykd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.053089155048262233
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 2.0239732672156662e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.040384193068281435


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2358,0.104435,0.771728,0.824052,0.771728,0.781822,0.630775
2,0.053,0.037507,0.892607,0.894923,0.892607,0.892444,0.808905
3,0.0223,0.041818,0.887612,0.901308,0.887612,0.889674,0.8067
4,0.0117,0.033707,0.923576,0.922956,0.923576,0.921376,0.861575
5,0.0056,0.032347,0.923576,0.923883,0.923576,0.92293,0.863187
6,0.0026,0.035663,0.921578,0.921957,0.921578,0.920632,0.859974
7,0.0016,0.033667,0.924076,0.925377,0.924076,0.924309,0.865107
8,0.0008,0.036266,0.915085,0.917889,0.915085,0.916137,0.852034


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/dark-sweep-19/best_model


0,1
eval/accuracy,▁▇▆██████
eval/f1,▁▆▆██████
eval/loss,█▂▂▁▁▁▁▁▁
eval/mcc,▁▆▆██████
eval/precision,▁▆▆████▇█
eval/recall,▁▇▆██████
eval/runtime,▄▁▅▆▇█▄▄▇
eval/samples_per_second,▅█▄▃▂▁▅▅▂
eval/steps_per_second,▅█▄▃▂▁▅▅▂
eval_accuracy,▁

0,1
eval/accuracy,0.92358
eval/f1,0.92293
eval/loss,0.03235
eval/mcc,0.86319
eval/precision,0.92388
eval/recall,0.92358
eval/runtime,0.9288
eval/samples_per_second,2155.379
eval/steps_per_second,67.827
eval_accuracy,0.92358


[34m[1mwandb[0m: Agent Starting Run: xwp4qpap with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.07903314515214156
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.385955264317498e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.049980818986872504


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2036,0.053376,0.855644,0.862195,0.855644,0.849202,0.737499
2,0.0428,0.041871,0.873127,0.887288,0.873127,0.876252,0.78644
3,0.0208,0.036918,0.897103,0.902491,0.897103,0.897939,0.821146
4,0.0101,0.03169,0.922078,0.92202,0.922078,0.921349,0.860885
5,0.0047,0.036751,0.911588,0.912916,0.911588,0.911529,0.843208
6,0.003,0.036961,0.916583,0.917824,0.916583,0.916792,0.852808
7,0.0023,0.037669,0.923077,0.925468,0.923077,0.923618,0.865384


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/stellar-sweep-20/best_model


0,1
eval/accuracy,▁▃▅█▇▇██
eval/f1,▁▄▆█▇▇██
eval/loss,█▄▃▁▃▃▃▁
eval/mcc,▁▄▆█▇▇██
eval/precision,▁▄▅█▇▇██
eval/recall,▁▃▅█▇▇██
eval/runtime,▁▅▆▄█▇▄▆
eval/samples_per_second,█▄▂▅▁▂▄▃
eval/steps_per_second,█▄▂▅▁▂▄▃
eval_accuracy,▁

0,1
eval/accuracy,0.92208
eval/f1,0.92135
eval/loss,0.03169
eval/mcc,0.86089
eval/precision,0.92202
eval/recall,0.92208
eval/runtime,0.9235
eval/samples_per_second,2167.952
eval/steps_per_second,68.222
eval_accuracy,0.92208


[34m[1mwandb[0m: Agent Starting Run: vvhvo5og with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.06492159882463382
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.180349651909234e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.046257240290342054


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2123,0.062686,0.844655,0.865794,0.844655,0.846369,0.730053
2,0.0434,0.038683,0.894106,0.900359,0.894106,0.893958,0.81355
3,0.0189,0.039206,0.903596,0.91092,0.903596,0.905304,0.833257
4,0.0094,0.03416,0.928072,0.928155,0.928072,0.926942,0.870485
5,0.0044,0.035702,0.913586,0.917636,0.913586,0.914914,0.849232
6,0.0027,0.033681,0.924575,0.926141,0.924575,0.925062,0.867095
7,0.0016,0.035333,0.927572,0.92977,0.927572,0.928211,0.872211
8,0.0009,0.0359,0.927073,0.929743,0.927073,0.92793,0.872589
9,0.0004,0.03541,0.930569,0.931443,0.930569,0.930827,0.877112


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/genial-sweep-21/best_model


0,1
eval/accuracy,▁▅▆█▇█████
eval/f1,▁▅▆█▇█████
eval/loss,█▂▂▁▁▁▁▂▁▁
eval/mcc,▁▅▆█▇█████
eval/precision,▁▅▆█▇▇███▇
eval/recall,▁▅▆█▇█████
eval/runtime,▂█▁▃▇▃█▂▆▅
eval/samples_per_second,▇▁█▆▂▆▁▇▃▄
eval/steps_per_second,▇▁█▆▂▆▁▇▃▄
eval_accuracy,▁

0,1
eval/accuracy,0.92458
eval/f1,0.92506
eval/loss,0.03368
eval/mcc,0.8671
eval/precision,0.92614
eval/recall,0.92458
eval/runtime,0.9409
eval/samples_per_second,2127.839
eval/steps_per_second,66.96
eval_accuracy,0.92458


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 543s29wo with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.05019595866903384
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 3.380328217986513e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04837406028360008


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2532,0.126698,0.762737,0.796795,0.762737,0.759689,0.587335
2,0.0544,0.041557,0.867632,0.891977,0.867632,0.874109,0.786264
3,0.0222,0.032799,0.906593,0.912122,0.906593,0.907896,0.835319
4,0.0095,0.032923,0.915584,0.918687,0.915584,0.914271,0.849964
5,0.0049,0.031409,0.91958,0.921223,0.91958,0.919859,0.85755
6,0.0023,0.035497,0.919081,0.920994,0.919081,0.918249,0.855999
7,0.0013,0.033384,0.928072,0.929243,0.928072,0.928267,0.873027
8,0.0006,0.033748,0.926074,0.927233,0.926074,0.92639,0.86966


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/deft-sweep-22/best_model


0,1
eval/accuracy,▁▅▇▇█████
eval/f1,▁▆▇▇█████
eval/loss,█▂▁▁▁▁▁▁▁
eval/mcc,▁▆▇▇█████
eval/precision,▁▆▇▇█████
eval/recall,▁▅▇▇█████
eval/runtime,▄▃▁▁▇▃▅▆█
eval/samples_per_second,▅▆██▂▆▄▃▁
eval/steps_per_second,▅▆██▂▆▄▃▁
eval_accuracy,▁

0,1
eval/accuracy,0.91958
eval/f1,0.91986
eval/loss,0.03141
eval/mcc,0.85755
eval/precision,0.92122
eval/recall,0.91958
eval/runtime,0.9431
eval/samples_per_second,2122.834
eval/steps_per_second,66.802
eval_accuracy,0.91958


[34m[1mwandb[0m: Agent Starting Run: ocbm92je with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.062057803733018946
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.3986382689365755e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.044399555041597136


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2089,0.066623,0.819181,0.850404,0.819181,0.823565,0.700122
2,0.0435,0.037756,0.888112,0.89698,0.888112,0.889131,0.807066
3,0.0214,0.032624,0.909091,0.911639,0.909091,0.90944,0.840442
4,0.0098,0.036198,0.91958,0.920241,0.91958,0.916816,0.854244
5,0.0041,0.041566,0.905594,0.913288,0.905594,0.907747,0.838447
6,0.0025,0.039099,0.917582,0.919347,0.917582,0.917952,0.855506


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/dulcet-sweep-23/best_model


0,1
eval/accuracy,▁▆▇█▇█▇
eval/f1,▁▆▇█▇█▇
eval/loss,█▂▁▂▃▂▁
eval/mcc,▁▆▇█▇█▇
eval/precision,▁▆▇█▇█▇
eval/recall,▁▆▇█▇█▇
eval/runtime,▁▁▂▁▅▁█
eval/samples_per_second,▇█▇█▄█▁
eval/steps_per_second,▇█▇█▄█▁
eval_accuracy,▁

0,1
eval/accuracy,0.90909
eval/f1,0.90944
eval/loss,0.03262
eval/mcc,0.84044
eval/precision,0.91164
eval/recall,0.90909
eval/runtime,0.9989
eval/samples_per_second,2004.258
eval/steps_per_second,63.071
eval_accuracy,0.90909


[34m[1mwandb[0m: Agent Starting Run: p9s7dh1e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.09999953255487871
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.281492601263243e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04950702958236411


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2052,0.061254,0.851648,0.845625,0.851648,0.83611,0.721863
2,0.043,0.043811,0.86963,0.890504,0.86963,0.874317,0.785996
3,0.0228,0.050441,0.877123,0.897405,0.877123,0.881533,0.796
4,0.0119,0.03587,0.919081,0.920158,0.919081,0.916942,0.854979
5,0.0065,0.038755,0.908092,0.917861,0.908092,0.910958,0.843549
6,0.0043,0.037316,0.923576,0.924113,0.923576,0.922281,0.863678
7,0.0024,0.03802,0.914585,0.919738,0.914585,0.916131,0.85273


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/dainty-sweep-24/best_model


0,1
eval/accuracy,▁▃▃█▆█▇█
eval/f1,▁▄▅█▇███
eval/loss,█▃▅▁▂▁▂▁
eval/mcc,▁▄▅█▇█▇█
eval/precision,▁▅▆█▇███
eval/recall,▁▃▃█▆█▇█
eval/runtime,▆▆▁█▁▂▆▃
eval/samples_per_second,▃▃█▁█▇▃▆
eval/steps_per_second,▃▃█▁█▇▃▆
eval_accuracy,▁

0,1
eval/accuracy,0.91908
eval/f1,0.91694
eval/loss,0.03587
eval/mcc,0.85498
eval/precision,0.92016
eval/recall,0.91908
eval/runtime,0.9119
eval/samples_per_second,2195.357
eval/steps_per_second,69.085
eval_accuracy,0.91908


[34m[1mwandb[0m: Agent Starting Run: 9j0j85v8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.0958820653295907
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 3.705605959842424e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04617736828235132


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 1512, Warmup Steps (10%): 151




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.21,0.055316,0.855644,0.866623,0.855644,0.851479,0.742174
2,0.0486,0.046864,0.849151,0.884458,0.849151,0.859038,0.759799
3,0.0246,0.043854,0.884615,0.899043,0.884615,0.888122,0.806676
4,0.0125,0.0401,0.920579,0.919975,0.920579,0.917827,0.855736
5,0.007,0.047425,0.8996,0.909283,0.8996,0.902064,0.828647
6,0.0043,0.040555,0.915584,0.918606,0.915584,0.915175,0.850693
7,0.0027,0.037171,0.916583,0.919587,0.916583,0.917346,0.854545
8,0.0013,0.039354,0.918581,0.922687,0.918581,0.91997,0.858483
9,0.0006,0.037799,0.929071,0.92966,0.929071,0.929162,0.874469
10,0.0004,0.039072,0.919081,0.922438,0.919081,0.920228,0.858973


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/happy-sweep-25/best_model


0,1
eval/accuracy,▂▁▄▇▅▇▇▇█▇▇
eval/f1,▁▂▄▇▆▇▇▇█▇▇
eval/loss,█▅▄▂▅▂▁▂▁▂▁
eval/mcc,▁▂▄▇▆▇▇▇█▇▇
eval/precision,▁▃▅▇▆▇▇▇█▇▇
eval/recall,▂▁▄▇▅▇▇▇█▇▇
eval/runtime,█▃▁▅▅▇▅▃▇▇▆
eval/samples_per_second,▁▆█▄▄▂▄▆▂▂▃
eval/steps_per_second,▁▆█▄▄▂▄▆▂▂▃
eval_accuracy,▁

0,1
eval/accuracy,0.91658
eval/f1,0.91735
eval/loss,0.03717
eval/mcc,0.85455
eval/precision,0.91959
eval/recall,0.91658
eval/runtime,0.9161
eval/samples_per_second,2185.254
eval/steps_per_second,68.767
eval_accuracy,0.91658


[34m[1mwandb[0m: Agent Starting Run: 830rrgmy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.05921821710712821
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.634434939662679e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.047477457773195245


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2457,0.099322,0.753746,0.810229,0.753746,0.757451,0.610964
2,0.0463,0.032833,0.895604,0.90328,0.895604,0.898136,0.821975
3,0.021,0.034628,0.910589,0.918562,0.910589,0.910604,0.841266
4,0.0109,0.032858,0.908591,0.913334,0.908591,0.909184,0.838883
5,0.0046,0.043862,0.901598,0.913337,0.901598,0.904692,0.836187


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/fresh-sweep-26/best_model


0,1
eval/accuracy,▁▇███▇
eval/f1,▁▇███▇
eval/loss,█▁▁▁▂▁
eval/mcc,▁▇███▇
eval/precision,▁▇███▇
eval/recall,▁▇███▇
eval/runtime,▁▃█▁▂▃
eval/samples_per_second,█▆▁█▇▆
eval/steps_per_second,█▆▁█▇▆
eval_accuracy,▁

0,1
eval/accuracy,0.8956
eval/f1,0.89814
eval/loss,0.03283
eval/mcc,0.82198
eval/precision,0.90328
eval/recall,0.8956
eval/runtime,0.9151
eval/samples_per_second,2187.809
eval/steps_per_second,68.847
eval_accuracy,0.8956


[34m[1mwandb[0m: Agent Starting Run: ybioh8ru with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.05932520932050617
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.304970853629921e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04824240804706734


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.258,0.115888,0.768232,0.776938,0.768232,0.746235,0.577242
2,0.064,0.040053,0.871129,0.884274,0.871129,0.874404,0.782773
3,0.0255,0.032985,0.904595,0.907406,0.904595,0.904706,0.829987
4,0.0139,0.033293,0.908591,0.911519,0.908591,0.907904,0.836891
5,0.0064,0.034452,0.911089,0.916068,0.911089,0.912438,0.846471
6,0.0037,0.035948,0.915584,0.915882,0.915584,0.915131,0.850499


✅ Best model saved to /content/drive/MyDrive/RoBERTa_8_Results/effortless-sweep-27/best_model


0,1
eval/accuracy,▁▆▇███▇
eval/f1,▁▆█████
eval/loss,█▂▁▁▁▁▁
eval/mcc,▁▆▇███▇
eval/precision,▁▆█████
eval/recall,▁▆▇███▇
eval/runtime,▁▁▂▂▂▂█
eval/samples_per_second,██▇▇▇▇▁
eval/steps_per_second,██▇▇▇▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.9046
eval/f1,0.90471
eval/loss,0.03299
eval/mcc,0.82999
eval/precision,0.90741
eval/recall,0.9046
eval/runtime,1.0183
eval/samples_per_second,1966.026
eval/steps_per_second,61.868
eval_accuracy,0.9046


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
