In [1]:
import os
import csv
import pandas as pd
import torch
import re
import nltk
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback, XLNetTokenizer, XLNetForSequenceClassification
import torch.nn as nn
import wandb

from google.colab import auth
from google.colab import drive
from google.colab import userdata

In [2]:
# =========================
# STEP 0: mount to drive
# =========================
#auth.authenticate_user()
#drive.mount('/content/drive')
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [3]:
# =========================
# STEP 1: Initialize WandB
# =========================
# Retrieve API Key from Colab Secrets
wandb_api_key = userdata.get('WANDB_API_KEY')

if wandb_api_key:
    os.environ["WANDB_API_KEY"] = wandb_api_key
    wandb.login(key=wandb_api_key)
    print("✅ WandB Logged in Securely")
else:
    print("❌ Error: WANDB_API_KEY not found. Set it in Colab Secrets.")


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maalhaizaey[0m ([33mabdulrahim-alhaizaey[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


✅ WandB Logged in Securely


In [4]:
# =========================
# STEP 2: LOAD DATA & CLEAN TEXT
# =========================
nltk.download('stopwords')
nltk.download('wordnet')

multi_data = pd.read_csv("/content/10006_dataset_Multi.csv", encoding='ISO-8859-1')

def clean_text(text):
    if pd.isnull(text):
        return ""
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text).lower()
    return text

multi_data['cleaned_text'] = multi_data['text'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [5]:
# =========================
# STEP 3: PREPARE DATASETS & LABEL ENCODING (Train Only)
# =========================
multi_train, multi_val = train_test_split(multi_data, test_size=0.2, stratify=multi_data['label'], random_state=42)

# Fit LabelEncoder only on training data to prevent leakage
multi_le = LabelEncoder()
multi_le.fit(multi_train['label'])
multi_train['encoded_label'] = multi_le.transform(multi_train['label'])
multi_val['encoded_label'] = multi_le.transform(multi_val['label'])


In [6]:
# =========================
# STEP 4: TOKENIZATION (Train Data Only for max_length)
# =========================
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")

# Calculate max_length using training data
max_length = min(tokenizer.model_max_length, max(multi_train['cleaned_text'].apply(lambda x: len(tokenizer.tokenize(x)))))
print(f"Using max_length from training set only: {max_length}")

def tokenize_data(df, label_col):
    encodings = tokenizer(df['cleaned_text'].tolist(), truncation=True, padding=True, max_length=max_length, return_tensors="pt")
    labels = torch.tensor(df[label_col].tolist())
    return encodings, labels

train_multi_enc, train_multi_labels = tokenize_data(multi_train, 'encoded_label')
val_multi_enc, val_multi_labels = tokenize_data(multi_val, 'encoded_label')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

Using max_length from training set only: 125


In [7]:
# =========================
# STEP 5: DATASET CLASS & METRICS
# =========================
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted', zero_division=0)
    mcc = matthews_corrcoef(labels, predictions)
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mcc": mcc
    }


In [8]:
# =========================
# STEP 6: FOCAL LOSS & CUSTOM TRAINER
# =========================
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean() if self.reduction == 'mean' else focal_loss.sum()

class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.training_loss = []
        self.validation_loss = []
        self.results = []

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = FocalLoss(alpha=0.25, gamma=2.0)
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

    def evaluate(self, *args, **kwargs):
        output = super().evaluate(*args, **kwargs)
        self.validation_loss.append(output['eval_loss'])
        self.results.append(output.copy())
        return output

    def log(self, logs, *args, **kwargs):
        super().log(logs, *args, **kwargs)
        if 'loss' in logs:
            self.training_loss.append(logs['loss'])


In [9]:
# =========================
# STEP 7: ADD PLOTTING (Confusion Matrices & Loss Curves)
# =========================
def plot_confusion_matrix(labels, predictions, label_encoder, output_dir, run_name):
    class_names = label_encoder.classes_
    conf_matrix = confusion_matrix(labels, predictions)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_numbers.png")
    plt.close()

def plot_confusion_matrix_percent(labels, predictions, label_encoder, output_dir, run_name):
    class_names = label_encoder.classes_
    conf_matrix = confusion_matrix(labels, predictions, normalize='true')
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='.2f', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_percent.png")
    plt.close()

def plot_confusion_matrix_class_weighted(labels, predictions, label_encoder, output_dir, run_name):
    conf_matrix = confusion_matrix(labels, predictions, normalize='true')
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='.2f', cmap='coolwarm', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{output_dir}/{run_name}_confusion_weighted.png")
    plt.close()

def plot_loss(training_loss, validation_loss, output_dir, run_name):
    min_length = min(len(training_loss), len(validation_loss))
    training_loss = training_loss[:min_length]
    validation_loss = validation_loss[:min_length]
    epochs = range(1, min_length + 1)
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, training_loss, label='Training Loss', marker='o')
    plt.plot(epochs, validation_loss, label='Validation Loss', marker='s', linestyle='--')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{output_dir}/{run_name}_loss_curves.png")
    plt.close()


In [10]:
# =========================
# STEP 8: TRAIN FUNCTION
# =========================
def train_xlnet(train_dataset, eval_dataset, num_labels, output_dir, label_encoder, config=None):
    with wandb.init(config=config):
        config = wandb.config
        run_name = wandb.run.name

        base_dir = "/content/drive/MyDrive/XLNet_8_Results"
        run_dir = os.path.join(base_dir, run_name)
        os.makedirs(run_dir, exist_ok=True)

        model = XLNetForSequenceClassification.from_pretrained(
          "xlnet-base-cased",
           num_labels=num_labels,
           dropout=config.dropout
        )


        #num_devices = torch.cuda.device_count()  # Will be 1 on Colab
        #examples_per_step = config.batch_size * num_devices * config.gradient_accumulation_steps
        #total_steps = int(np.ceil(len(train_dataset) / examples_per_step) * config.num_train_epochs)

        num_devices = torch.cuda.device_count()  # Will be 1 on Colab
        gradient_accumulation_steps = config.get('gradient_accumulation_steps', 1)  # Provide a default value
        examples_per_step = config.batch_size * num_devices * gradient_accumulation_steps
        total_steps = int(np.ceil(len(train_dataset) / examples_per_step) * config.num_train_epochs)


        warmup_steps = int(0.1 * total_steps)
        print(f"Total Steps: {total_steps}, Warmup Steps (10%): {warmup_steps}")

        training_args = TrainingArguments(
            output_dir=output_dir,
            run_name=run_name,
            num_train_epochs=config.num_train_epochs,
            per_device_train_batch_size=config.batch_size,
            per_device_eval_batch_size=config.batch_size,
            learning_rate=config.learning_rate,
            warmup_steps=warmup_steps,
            weight_decay=config.weight_decay,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            fp16=True,
            #gradient_checkpointing=True,
            save_total_limit=2,
            gradient_accumulation_steps = config.get('gradient_accumulation_steps', 1),  # Provide a default value
            #gradient_accumulation_steps=config.gradient_accumulation_steps,
            max_grad_norm=config.max_grad_norm,
            lr_scheduler_type="cosine_with_restarts",
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            report_to="wandb"
        )

        trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
        )

        trainer.train()

        # Save Best Model
        best_model_path = os.path.join(run_dir, "best_model")
        trainer.save_model(best_model_path)
        print(f"✅ Best model saved to {best_model_path}")

        # Evaluate Best Model
        results = trainer.evaluate(eval_dataset=eval_dataset)

        predictions_obj = trainer.predict(eval_dataset)
        predictions = np.argmax(predictions_obj.predictions, axis=1)
        labels = predictions_obj.label_ids

        wandb.log({
            "eval_loss": results.get("eval_loss"),
            "eval_accuracy": results.get("eval_accuracy"),
            "eval_precision": results.get("eval_precision"),
            "eval_recall": results.get("eval_recall"),
            "eval_f1": results.get("eval_f1"),
            "eval_mcc": results.get("eval_mcc")
        })

        plot_confusion_matrix(labels, predictions, label_encoder, run_dir, run_name)
        plot_confusion_matrix_percent(labels, predictions, label_encoder, run_dir, run_name)
        plot_confusion_matrix_class_weighted(labels, predictions, label_encoder, run_dir, run_name)
        plot_loss(trainer.training_loss, trainer.validation_loss, run_dir, run_name)

        # Save run parameters and results to a CSV file
        results_csv = os.path.join(run_dir, "results.csv")
        with open(results_csv, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Parameter", "Value"])
            for key, value in config.items():
                writer.writerow([key, value])
            writer.writerow(["Epoch", "Training Loss", "Validation Loss", "Accuracy", "Precision", "Recall", "F1", "Mcc"])
            for epoch, result in enumerate(trainer.results, start=1):
                writer.writerow([
                    epoch,
                    trainer.training_loss[epoch - 1] if epoch - 1 < len(trainer.training_loss) else None,
                    trainer.validation_loss[epoch - 1] if epoch - 1 < len(trainer.validation_loss) else None,
                    result.get("eval_accuracy"),
                    result.get("eval_precision"),
                    result.get("eval_recall"),
                    result.get("eval_f1"),
                    result.get("eval_mcc")
                ])

        return results


In [11]:
# =========================
# STEP 9: RUN SWEEP CONFIGURATION
# =========================
train_dataset = TextDataset(train_multi_enc, train_multi_labels)
val_dataset = TextDataset(val_multi_enc, val_multi_labels)

sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'eval_loss', 'goal': 'minimize'},
    'parameters': {
        'num_train_epochs': {'values': [12]},
        'learning_rate': {'min': 1e-5, 'max': 5e-5},
        'batch_size': {'values': [8, 16, 32]},
        'weight_decay': {'min': 0.01, 'max': 0.05},
        'dropout': {'min': 0.05, 'max': 0.5},
        #'gradient_accumulation_steps': {'values': [2, 4]},
        'gradient_accumulation_steps': {'values': [2, 4]},  # Ensure this line is present
        'max_grad_norm': {'values': [1.0, 2.0]}
    },
}

In [12]:


sweep_id = wandb.sweep(sweep=sweep_config, project='XLNet_8')

wandb.agent(sweep_id, function=lambda: train_xlnet(
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    num_labels=len(multi_le.classes_),
    output_dir="multi_output",
    label_encoder=multi_le
))


Create sweep with ID: 91ggabyj
Sweep URL: https://wandb.ai/abdulrahim-alhaizaey/XLNet_8/sweeps/91ggabyj


[34m[1mwandb[0m: Agent Starting Run: 2mlv5ueb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.39393678929202386
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.2571946025271295e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.02214346734807081


pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 756, Warmup Steps (10%): 75




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,1.6099,0.354134,0.645355,0.416483,0.645355,0.506253,0.0
2,0.9565,0.295536,0.645355,0.416483,0.645355,0.506253,0.0
3,0.9323,0.281715,0.645355,0.416483,0.645355,0.506253,0.0
4,0.9127,0.280737,0.645355,0.416483,0.645355,0.506253,0.0
5,0.9141,0.265272,0.645355,0.416483,0.645355,0.506253,0.0
6,0.9079,0.264566,0.645355,0.416483,0.645355,0.506253,0.0
7,0.9083,0.266694,0.645355,0.416483,0.645355,0.506253,0.0
8,0.9082,0.260107,0.645355,0.416483,0.645355,0.506253,0.0
9,0.904,0.268999,0.645355,0.416483,0.645355,0.506253,0.0
10,0.9051,0.26739,0.645355,0.416483,0.645355,0.506253,0.0


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/peachy-sweep-1/best_model


0,1
eval/accuracy,▁▁▁▁▁▁▁▁▁▁▁▁
eval/f1,▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▄▃▃▁▁▁▁▂▂▂▁
eval/mcc,▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall,▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▅▄▃▁▁▃▇▃▆▇▆█
eval/samples_per_second,▃▅▆██▆▂▆▃▂▃▁
eval/steps_per_second,▃▅▆██▆▂▆▃▂▃▁
eval_accuracy,▁

0,1
eval/accuracy,0.64535
eval/f1,0.50625
eval/loss,0.26011
eval/mcc,0.0
eval/precision,0.41648
eval/recall,0.64535
eval/runtime,2.0437
eval/samples_per_second,979.612
eval/steps_per_second,30.827
eval_accuracy,0.64535


[34m[1mwandb[0m: Agent Starting Run: 9utbiaxk with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.1894602743519923
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.8380652766972834e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.024002535272155782


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.8198,0.085993,0.813187,0.808801,0.813187,0.790585,0.641643
2,0.2514,0.056409,0.838661,0.879885,0.838661,0.849271,0.742951
3,0.1501,0.045611,0.881618,0.893899,0.881618,0.884736,0.800245
4,0.1016,0.047526,0.85964,0.884611,0.85964,0.86588,0.776576
5,0.0705,0.047261,0.876124,0.898154,0.876124,0.882217,0.795515
6,0.0444,0.042527,0.903097,0.907708,0.903097,0.90443,0.832822
7,0.03,0.047621,0.891608,0.906292,0.891608,0.895337,0.822305
8,0.0162,0.044095,0.905594,0.913553,0.905594,0.908083,0.840521
9,0.0116,0.049403,0.900599,0.913231,0.900599,0.904136,0.835469


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/hardy-sweep-2/best_model


0,1
eval/accuracy,▁▃▆▅▆█▇███
eval/f1,▁▄▇▅▆█▇███
eval/loss,█▃▁▂▂▁▂▁▂▁
eval/mcc,▁▅▇▆▆█▇███
eval/precision,▁▆▇▆▇█████
eval/recall,▁▃▆▅▆█▇███
eval/runtime,▂▁▁▂▃▂▂▂▃█
eval/samples_per_second,▇██▇▆▇▇▆▆▁
eval/steps_per_second,▇██▇▆▇▇▆▆▁
eval_accuracy,▁

0,1
eval/accuracy,0.9031
eval/f1,0.90443
eval/loss,0.04253
eval/mcc,0.83282
eval/precision,0.90771
eval/recall,0.9031
eval/runtime,7.4841
eval/samples_per_second,267.499
eval/steps_per_second,33.538
eval_accuracy,0.9031


[34m[1mwandb[0m: Agent Starting Run: gjpmjtta with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.328426028259407
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 1.7345169513541595e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.02755589824334815


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,1.14,0.251594,0.645355,0.416483,0.645355,0.506253,0.0
2,0.9099,0.215257,0.645355,0.416483,0.645355,0.506253,0.0
3,0.8986,0.200411,0.645355,0.416483,0.645355,0.506253,0.0
4,0.8572,0.22583,0.664835,0.617859,0.664835,0.62319,0.384264
5,0.7378,0.231726,0.512488,0.661334,0.512488,0.542958,0.332564
6,0.5932,0.217591,0.584416,0.72308,0.584416,0.607717,0.42526


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/rural-sweep-3/best_model


0,1
eval/accuracy,▇▇▇█▁▄▇
eval/f1,▁▁▁█▃▇▁
eval/loss,█▃▁▄▅▃▁
eval/mcc,▁▁▁▇▆█▁
eval/precision,▁▁▁▆▇█▁
eval/recall,▇▇▇█▁▄▇
eval/runtime,▂▂▁▂█▃▅
eval/samples_per_second,▇▇█▇▁▆▄
eval/steps_per_second,▇▇█▇▁▆▄
eval_accuracy,▁

0,1
eval/accuracy,0.64535
eval/f1,0.50625
eval/loss,0.20041
eval/mcc,0.0
eval/precision,0.41648
eval/recall,0.64535
eval/runtime,6.9581
eval/samples_per_second,287.724
eval/steps_per_second,36.073
eval_accuracy,0.64535


[34m[1mwandb[0m: Agent Starting Run: abmqctjl with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.14419802393225584
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.809567868292578e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.02463256479113502


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.7681,0.05536,0.862138,0.864021,0.862138,0.854428,0.747849
2,0.2244,0.042823,0.863137,0.883365,0.863137,0.868431,0.772613
3,0.1262,0.038733,0.888611,0.899351,0.888611,0.890983,0.810111
4,0.0846,0.04706,0.863137,0.888705,0.863137,0.869069,0.77731
5,0.0481,0.043926,0.908591,0.914743,0.908591,0.910039,0.840416
6,0.0284,0.04172,0.917083,0.919681,0.917083,0.915003,0.851899


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/winter-sweep-4/best_model


0,1
eval/accuracy,▁▁▄▁▇█▄
eval/f1,▁▃▅▃▇█▅
eval/loss,█▃▁▅▃▂▁
eval/mcc,▁▃▅▃▇█▅
eval/precision,▁▃▅▄▇█▅
eval/recall,▁▁▄▁▇█▄
eval/runtime,▆▂▁█▅▄▇
eval/samples_per_second,▃▇█▁▄▅▂
eval/steps_per_second,▃▇█▁▄▅▂
eval_accuracy,▁

0,1
eval/accuracy,0.88861
eval/f1,0.89098
eval/loss,0.03873
eval/mcc,0.81011
eval/precision,0.89935
eval/recall,0.88861
eval/runtime,6.9936
eval/samples_per_second,286.261
eval/steps_per_second,35.89
eval_accuracy,0.88861


[34m[1mwandb[0m: Agent Starting Run: 1h6h79mx with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.1953780263301953
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.9871331234920905e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.03679405302462272


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.872,0.080585,0.816683,0.82593,0.816683,0.799235,0.655296
2,0.2686,0.051415,0.838162,0.873809,0.838162,0.845412,0.740648
3,0.154,0.042876,0.86963,0.886679,0.86963,0.873593,0.782381
4,0.1035,0.04964,0.846154,0.882617,0.846154,0.854806,0.760886
5,0.0685,0.047132,0.881119,0.896533,0.881119,0.884969,0.802899
6,0.048,0.039091,0.911089,0.914331,0.911089,0.911879,0.845077
7,0.0283,0.049685,0.889111,0.912912,0.889111,0.89533,0.822218
8,0.0149,0.048519,0.898601,0.913898,0.898601,0.902758,0.832428
9,0.0111,0.049974,0.912088,0.921373,0.912088,0.914629,0.852095


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/icy-sweep-5/best_model


0,1
eval/accuracy,▁▃▅▃▆█▆▇██
eval/f1,▁▄▆▄▆█▇▇██
eval/loss,█▃▂▃▂▁▃▃▃▁
eval/mcc,▁▄▆▅▆█▇▇██
eval/precision,▁▅▅▅▆▇▇▇█▇
eval/recall,▁▃▅▃▆█▆▇██
eval/runtime,▂▃▂▄▁▄▂▃▅█
eval/samples_per_second,▇▆▇▅█▅▇▆▄▁
eval/steps_per_second,▇▆▇▅█▅▇▆▄▁
eval_accuracy,▁

0,1
eval/accuracy,0.91109
eval/f1,0.91188
eval/loss,0.03909
eval/mcc,0.84508
eval/precision,0.91433
eval/recall,0.91109
eval/runtime,7.1034
eval/samples_per_second,281.836
eval/steps_per_second,35.335
eval_accuracy,0.91109


[34m[1mwandb[0m: Agent Starting Run: 05ckg826 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.1131639162963022
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.6862478872463486e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.023907137703407924


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2775,0.059268,0.84016,0.868135,0.84016,0.84428,0.725698
2,0.1069,0.057682,0.784715,0.875389,0.784715,0.805754,0.700102
3,0.0542,0.037255,0.911089,0.914542,0.911089,0.912258,0.845995
4,0.0293,0.043888,0.883117,0.903492,0.883117,0.8876,0.80993
5,0.019,0.050404,0.895105,0.904867,0.895105,0.89757,0.82002
6,0.0098,0.054564,0.888112,0.901777,0.888112,0.892055,0.813867


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/solar-sweep-6/best_model


0,1
eval/accuracy,▄▁█▆▇▇█
eval/f1,▄▁█▆▇▇█
eval/loss,█▇▁▃▅▇▁
eval/mcc,▂▁█▆▇▆█
eval/precision,▁▂█▆▇▆█
eval/recall,▄▁█▆▇▇█
eval/runtime,▄▁▁▁▅▄█
eval/samples_per_second,▅███▄▅▁
eval/steps_per_second,▅███▄▅▁
eval_accuracy,▁

0,1
eval/accuracy,0.91109
eval/f1,0.91226
eval/loss,0.03725
eval/mcc,0.846
eval/precision,0.91454
eval/recall,0.91109
eval/runtime,7.0043
eval/samples_per_second,285.826
eval/steps_per_second,35.835
eval_accuracy,0.91109


[34m[1mwandb[0m: Agent Starting Run: 12h1pqb2 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.08912214370519564
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.968677680325894e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.0474526816685298


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2874,0.070326,0.831169,0.850697,0.831169,0.828859,0.697586
2,0.1006,0.04605,0.84965,0.888276,0.84965,0.858299,0.763874
3,0.049,0.03692,0.892607,0.90259,0.892607,0.895083,0.81676
4,0.0295,0.040211,0.908092,0.907622,0.908092,0.906446,0.833505
5,0.0157,0.051577,0.901099,0.908588,0.901099,0.903159,0.832494
6,0.0111,0.04615,0.918581,0.919581,0.918581,0.916678,0.853408


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/sparkling-sweep-7/best_model


0,1
eval/accuracy,▁▂▆▇▇█▆
eval/f1,▁▃▆▇▇█▆
eval/loss,█▃▁▂▄▃▁
eval/mcc,▁▄▆▇▇█▆
eval/precision,▁▅▆▇▇█▆
eval/recall,▁▂▆▇▇█▆
eval/runtime,▃▁▁▃▂▂█
eval/samples_per_second,▆██▆▇▇▁
eval/steps_per_second,▆██▆▇▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.89261
eval/f1,0.89508
eval/loss,0.03692
eval/mcc,0.81676
eval/precision,0.90259
eval/recall,0.89261
eval/runtime,7.2417
eval/samples_per_second,276.456
eval/steps_per_second,34.661
eval_accuracy,0.89261


[34m[1mwandb[0m: Agent Starting Run: 3rd48oip with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.059935035833801366
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.7151640967028046e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.03790192429117191


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2793,0.07492,0.82018,0.851377,0.82018,0.812978,0.679413
2,0.0925,0.051097,0.84016,0.894646,0.84016,0.853975,0.752285
3,0.0455,0.036913,0.906593,0.912584,0.906593,0.907971,0.836343
4,0.0234,0.045601,0.898102,0.901948,0.898102,0.898553,0.819117
5,0.0129,0.044392,0.912587,0.91448,0.912587,0.912762,0.845785
6,0.0067,0.046221,0.918082,0.917226,0.918082,0.916045,0.851986


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/flowing-sweep-8/best_model


0,1
eval/accuracy,▁▂▇▇██▇
eval/f1,▁▄▇▇██▇
eval/loss,█▄▁▃▂▃▁
eval/mcc,▁▄▇▇██▇
eval/precision,▁▆█▆███
eval/recall,▁▂▇▇██▇
eval/runtime,▅▃▂▁▄▃█
eval/samples_per_second,▄▆▇█▅▆▁
eval/steps_per_second,▄▆▇█▅▆▁
eval_accuracy,▁

0,1
eval/accuracy,0.90659
eval/f1,0.90797
eval/loss,0.03691
eval/mcc,0.83634
eval/precision,0.91258
eval/recall,0.90659
eval/runtime,7.2428
eval/samples_per_second,276.414
eval/steps_per_second,34.655
eval_accuracy,0.90659


[34m[1mwandb[0m: Agent Starting Run: t1gtjp82 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.09185418565440062
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.989735460287166e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.025608577585260014


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2956,0.066378,0.833167,0.848619,0.833167,0.833779,0.711161
2,0.1014,0.046748,0.874126,0.891097,0.874126,0.877909,0.786433
3,0.0575,0.040315,0.896603,0.900751,0.896603,0.897754,0.821158
4,0.0318,0.040147,0.918082,0.920782,0.918082,0.918731,0.855991
5,0.0195,0.044971,0.917083,0.919619,0.917083,0.917025,0.853645
6,0.0103,0.053697,0.917582,0.919526,0.917582,0.91635,0.852757
7,0.0053,0.061711,0.905594,0.916761,0.905594,0.908477,0.841354


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/desert-sweep-9/best_model


0,1
eval/accuracy,▁▄▆███▇█
eval/f1,▁▅▆███▇█
eval/loss,█▃▁▁▂▅▇▁
eval/mcc,▁▅▆███▇█
eval/precision,▁▅▆█████
eval/recall,▁▄▆███▇█
eval/runtime,▄▁▃▁█▂▆█
eval/samples_per_second,▅█▆█▁▇▃▁
eval/steps_per_second,▅█▆█▁▇▃▁
eval_accuracy,▁

0,1
eval/accuracy,0.91808
eval/f1,0.91873
eval/loss,0.04015
eval/mcc,0.85599
eval/precision,0.92078
eval/recall,0.91808
eval/runtime,7.2263
eval/samples_per_second,277.044
eval/steps_per_second,34.734
eval_accuracy,0.91808


[34m[1mwandb[0m: Agent Starting Run: vtyel0il with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.0932573799377783
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.935849848077008e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.021858044131989825


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2736,0.056338,0.856144,0.866139,0.856144,0.849125,0.738732
2,0.1035,0.048786,0.861139,0.895278,0.861139,0.868723,0.770889
3,0.0555,0.037212,0.897103,0.903877,0.897103,0.89877,0.820581
4,0.0272,0.043452,0.908591,0.90996,0.908591,0.905728,0.834539
5,0.0152,0.052185,0.91009,0.915764,0.91009,0.910735,0.840631
6,0.01,0.039377,0.919081,0.922047,0.919081,0.919245,0.858223


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/major-sweep-10/best_model


0,1
eval/accuracy,▁▂▆▇▇█▆
eval/f1,▁▃▆▇▇█▆
eval/loss,█▅▁▃▆▂▁
eval/mcc,▁▃▆▇▇█▆
eval/precision,▁▅▆▆▇█▆
eval/recall,▁▂▆▇▇█▆
eval/runtime,▄▁▆█▆▂▂
eval/samples_per_second,▅█▃▁▃▇▇
eval/steps_per_second,▅█▃▁▃▇▇
eval_accuracy,▁

0,1
eval/accuracy,0.8971
eval/f1,0.89877
eval/loss,0.03721
eval/mcc,0.82058
eval/precision,0.90388
eval/recall,0.8971
eval/runtime,6.9675
eval/samples_per_second,287.333
eval/steps_per_second,36.024
eval_accuracy,0.8971


[34m[1mwandb[0m: Agent Starting Run: v4tr3noc with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.05602902803366877
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.832202994610151e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.03031764933967538


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.2754,0.067914,0.829171,0.863854,0.829171,0.824691,0.696514
2,0.0953,0.057828,0.796204,0.885971,0.796204,0.818497,0.701373
3,0.0506,0.038352,0.901099,0.901862,0.901099,0.898547,0.822407
4,0.0269,0.044397,0.913087,0.91424,0.913087,0.912364,0.844688
5,0.0151,0.053773,0.908591,0.914625,0.908591,0.909615,0.840262
6,0.0077,0.050689,0.920579,0.921319,0.920579,0.918472,0.856733


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/woven-sweep-11/best_model


0,1
eval/accuracy,▃▁▇█▇█▇
eval/f1,▁▁▇█▇█▇
eval/loss,█▆▁▂▅▄▁
eval/mcc,▁▁▇▇▇█▇
eval/precision,▁▄▆▇▇█▆
eval/recall,▃▁▇█▇█▇
eval/runtime,▁▂▁▁▃▂█
eval/samples_per_second,█▇██▆▇▁
eval/steps_per_second,█▇██▆▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.9011
eval/f1,0.89855
eval/loss,0.03835
eval/mcc,0.82241
eval/precision,0.90186
eval/recall,0.9011
eval/runtime,7.4245
eval/samples_per_second,269.648
eval/steps_per_second,33.807
eval_accuracy,0.9011


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3vrtiuh4 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.09228349246063516
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.539846058721734e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.03057788059236763


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.3053,0.059935,0.843656,0.857001,0.843656,0.841667,0.719478
2,0.1005,0.047377,0.878621,0.899992,0.878621,0.883371,0.800749
3,0.0471,0.035454,0.906593,0.9114,0.906593,0.907839,0.836504
4,0.027,0.044628,0.912088,0.912935,0.912088,0.911418,0.843261
5,0.0157,0.044279,0.918082,0.922771,0.918082,0.918934,0.85646
6,0.0055,0.04562,0.920579,0.922413,0.920579,0.919022,0.857883


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/clean-sweep-12/best_model


0,1
eval/accuracy,▁▄▇▇██▇
eval/f1,▁▅▇▇██▇
eval/loss,█▄▁▄▄▄▁
eval/mcc,▁▅▇▇██▇
eval/precision,▁▆▇▇██▇
eval/recall,▁▄▇▇██▇
eval/runtime,▄▅▃▁▃█▇
eval/samples_per_second,▅▄▆█▅▁▂
eval/steps_per_second,▅▄▆█▅▁▂
eval_accuracy,▁

0,1
eval/accuracy,0.90659
eval/f1,0.90784
eval/loss,0.03545
eval/mcc,0.8365
eval/precision,0.9114
eval/recall,0.90659
eval/runtime,7.2054
eval/samples_per_second,277.848
eval/steps_per_second,34.835
eval_accuracy,0.90659


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5oqku6wp with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.17479155650849873
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.947144152080488e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.04675487091960991


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.8287,0.072979,0.837662,0.851875,0.837662,0.818774,0.697222
2,0.2424,0.0462,0.859141,0.877489,0.859141,0.858655,0.761803
3,0.1431,0.044809,0.864136,0.884013,0.864136,0.869222,0.77705
4,0.0878,0.054698,0.863636,0.889606,0.863636,0.869623,0.777822
5,0.0662,0.048075,0.873127,0.896467,0.873127,0.879102,0.794603
6,0.0352,0.042207,0.913087,0.916287,0.913087,0.913727,0.848915
7,0.0231,0.047947,0.904096,0.914449,0.904096,0.906796,0.840725
8,0.0118,0.04928,0.906094,0.921259,0.906094,0.909997,0.846052
9,0.0104,0.048671,0.906593,0.9195,0.906593,0.910023,0.845582


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/twilight-sweep-13/best_model


0,1
eval/accuracy,▁▃▃▃▄█▇▇▇█
eval/f1,▁▄▅▅▅█▇███
eval/loss,█▂▂▄▂▁▂▃▂▁
eval/mcc,▁▄▅▅▅█████
eval/precision,▁▄▄▅▅▇▇██▇
eval/recall,▁▃▃▃▄█▇▇▇█
eval/runtime,▇▄▁▃▄▂▂▅▁█
eval/samples_per_second,▂▅█▆▅▇▇▄█▁
eval/steps_per_second,▂▅█▆▅▇▇▄█▁
eval_accuracy,▁

0,1
eval/accuracy,0.91309
eval/f1,0.91373
eval/loss,0.04221
eval/mcc,0.84892
eval/precision,0.91629
eval/recall,0.91309
eval/runtime,7.1681
eval/samples_per_second,279.293
eval/steps_per_second,35.016
eval_accuracy,0.91309


[34m[1mwandb[0m: Agent Starting Run: yvyavc2p with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.15494384324920077
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.8285539099129035e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.02377949948176715


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.316,0.059674,0.840659,0.853627,0.840659,0.84068,0.719767
2,0.1132,0.076495,0.798701,0.867937,0.798701,0.815149,0.70316
3,0.0654,0.040508,0.891109,0.897678,0.891109,0.892332,0.808261
4,0.0423,0.047718,0.85964,0.889625,0.85964,0.867608,0.779593
5,0.0238,0.041845,0.903596,0.908226,0.903596,0.904519,0.83254
6,0.0136,0.043346,0.911588,0.915445,0.911588,0.910876,0.84436


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/legendary-sweep-14/best_model


0,1
eval/accuracy,▄▁▇▅██▇
eval/f1,▃▁▇▅██▇
eval/loss,▅█▁▂▁▂▁
eval/mcc,▂▁▆▅▇█▆
eval/precision,▁▃▆▅▇█▆
eval/recall,▄▁▇▅██▇
eval/runtime,▃▃▂▁▂▂█
eval/samples_per_second,▆▆▇█▇▇▁
eval/steps_per_second,▆▆▇█▇▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.89111
eval/f1,0.89233
eval/loss,0.04051
eval/mcc,0.80826
eval/precision,0.89768
eval/recall,0.89111
eval/runtime,7.3304
eval/samples_per_second,273.107
eval/steps_per_second,34.241
eval_accuracy,0.89111


[34m[1mwandb[0m: Agent Starting Run: l8q1yd0w with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.12912317829063477
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.998412180504118e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.035165053555579265


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 6012, Warmup Steps (10%): 601




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.3264,0.052923,0.854645,0.863053,0.854645,0.849325,0.734514
2,0.1116,0.063049,0.828172,0.878045,0.828172,0.839058,0.734439
3,0.0661,0.044703,0.896104,0.901055,0.896104,0.897689,0.821127
4,0.0363,0.044866,0.904595,0.910411,0.904595,0.905869,0.83474
5,0.0244,0.054927,0.883616,0.902816,0.883616,0.888847,0.808596
6,0.0127,0.048556,0.925574,0.925375,0.925574,0.924405,0.866793


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/wandering-sweep-15/best_model


0,1
eval/accuracy,▃▁▆▆▅█▆
eval/f1,▂▁▆▆▅█▆
eval/loss,▄█▁▁▅▂▁
eval/mcc,▁▁▆▆▅█▆
eval/precision,▁▃▅▆▅█▅
eval/recall,▃▁▆▆▅█▆
eval/runtime,▁▂▂▃▄▇█
eval/samples_per_second,█▇▇▆▅▂▁
eval/steps_per_second,█▇▇▆▅▂▁
eval_accuracy,▁

0,1
eval/accuracy,0.8961
eval/f1,0.89769
eval/loss,0.0447
eval/mcc,0.82113
eval/precision,0.90105
eval/recall,0.8961
eval/runtime,7.1581
eval/samples_per_second,279.681
eval/steps_per_second,35.065
eval_accuracy,0.8961


[34m[1mwandb[0m: Agent Starting Run: klh2fgbb with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.054869341664909144
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.707021947057352e-05
[34m[1mwandb[0m: 	max_grad_norm: 1
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.01962869221932527


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.6198,0.04472,0.881618,0.885015,0.881618,0.875293,0.784393
2,0.1751,0.037479,0.882617,0.89441,0.882617,0.885503,0.796721
3,0.0853,0.038267,0.911588,0.913675,0.911588,0.910939,0.843884
4,0.0499,0.045647,0.911089,0.910075,0.911089,0.907044,0.838466
5,0.0311,0.043579,0.90959,0.914767,0.90959,0.91117,0.843033


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/visionary-sweep-16/best_model


0,1
eval/accuracy,▁▁███▁
eval/f1,▁▃█▇█▃
eval/loss,▇▁▂█▆▁
eval/mcc,▁▂█▇█▂
eval/precision,▁▃█▇█▃
eval/recall,▁▁███▁
eval/runtime,▆█▄▁▁▅
eval/samples_per_second,▃▁▅██▄
eval/steps_per_second,▃▁▅██▄
eval_accuracy,▁

0,1
eval/accuracy,0.88262
eval/f1,0.8855
eval/loss,0.03748
eval/mcc,0.79672
eval/precision,0.89441
eval/recall,0.88262
eval/runtime,7.0148
eval/samples_per_second,285.395
eval/steps_per_second,35.781
eval_accuracy,0.88262


[34m[1mwandb[0m: Agent Starting Run: ljl42xd6 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.09616786785046327
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.5068012486482445e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.038132825200275104


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.6326,0.053393,0.865634,0.865455,0.865634,0.853468,0.752707
2,0.1967,0.042156,0.87962,0.895652,0.87962,0.883005,0.798618
3,0.1014,0.035489,0.903097,0.906426,0.903097,0.902326,0.829992
4,0.0519,0.032546,0.924076,0.92431,0.924076,0.923857,0.865167
5,0.0299,0.049756,0.892108,0.904099,0.892108,0.895688,0.819403
6,0.0144,0.042686,0.927073,0.926812,0.927073,0.926034,0.869373
7,0.0114,0.043065,0.925075,0.925596,0.925075,0.924874,0.867455


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/icy-sweep-17/best_model


0,1
eval/accuracy,▁▃▅█▄███
eval/f1,▁▄▆█▅███
eval/loss,█▄▂▁▇▄▅▁
eval/mcc,▁▄▆█▅███
eval/precision,▁▄▆█▅███
eval/recall,▁▃▅█▄███
eval/runtime,▅▆█▂▁▁▁▇
eval/samples_per_second,▄▃▁▇███▂
eval/steps_per_second,▄▃▁▇███▂
eval_accuracy,▁

0,1
eval/accuracy,0.92408
eval/f1,0.92386
eval/loss,0.03255
eval/mcc,0.86517
eval/precision,0.92431
eval/recall,0.92408
eval/runtime,7.0486
eval/samples_per_second,284.029
eval/steps_per_second,35.61
eval_accuracy,0.92408


[34m[1mwandb[0m: Agent Starting Run: llheh9r7 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.10136332211866075
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.3342562706213256e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.028765546318502012


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.6727,0.050594,0.864635,0.869966,0.864635,0.859125,0.756172
2,0.1999,0.038971,0.884615,0.894001,0.884615,0.887701,0.802315
3,0.1012,0.03348,0.898601,0.900927,0.898601,0.896725,0.819765
4,0.0606,0.03679,0.901598,0.91047,0.901598,0.903429,0.831706
5,0.0333,0.041752,0.906593,0.915518,0.906593,0.909017,0.840559
6,0.0176,0.044311,0.914086,0.918904,0.914086,0.91557,0.851516


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/woven-sweep-18/best_model


0,1
eval/accuracy,▁▄▆▆▇█▆
eval/f1,▁▅▆▆▇█▆
eval/loss,█▃▁▂▄▅▁
eval/mcc,▁▄▆▇▇█▆
eval/precision,▁▄▅▇██▅
eval/recall,▁▄▆▆▇█▆
eval/runtime,██▂▂▄▁▆
eval/samples_per_second,▁▁▇▇▅█▃
eval/steps_per_second,▁▁▇▇▅█▃
eval_accuracy,▁

0,1
eval/accuracy,0.8986
eval/f1,0.89672
eval/loss,0.03348
eval/mcc,0.81976
eval/precision,0.90093
eval/recall,0.8986
eval/runtime,6.8882
eval/samples_per_second,290.641
eval/steps_per_second,36.439
eval_accuracy,0.8986


[34m[1mwandb[0m: Agent Starting Run: yy10n2wy with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.05623715979083386
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.4751052866628954e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.014921137240551803


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.623,0.043088,0.880619,0.881823,0.880619,0.876749,0.782032
2,0.1734,0.043552,0.869131,0.886414,0.869131,0.872917,0.782362
3,0.0906,0.040091,0.895604,0.9028,0.895604,0.894116,0.817927
4,0.0461,0.035821,0.911588,0.911731,0.911588,0.909856,0.842073
5,0.0238,0.043039,0.917582,0.918624,0.917582,0.917238,0.853359
6,0.0121,0.040034,0.925075,0.924513,0.925075,0.92308,0.864344
7,0.0048,0.041616,0.923077,0.924809,0.923077,0.923152,0.863605


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/genial-sweep-19/best_model


0,1
eval/accuracy,▂▁▄▆▇██▆
eval/f1,▂▁▄▆▇██▆
eval/loss,██▅▁█▅▆▁
eval/mcc,▁▁▄▆▇██▆
eval/precision,▁▂▄▆▇██▆
eval/recall,▂▁▄▆▇██▆
eval/runtime,▂▄▇▁▃▃▂█
eval/samples_per_second,▇▅▂█▆▆▇▁
eval/steps_per_second,▇▅▂█▆▆▇▁
eval_accuracy,▁

0,1
eval/accuracy,0.91159
eval/f1,0.90986
eval/loss,0.03582
eval/mcc,0.84207
eval/precision,0.91173
eval/recall,0.91159
eval/runtime,6.9868
eval/samples_per_second,286.542
eval/steps_per_second,35.925
eval_accuracy,0.91159


[34m[1mwandb[0m: Agent Starting Run: 92igi743 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.06252413030321119
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.6950001721149274e-05
[34m[1mwandb[0m: 	max_grad_norm: 2
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	weight_decay: 0.018556522263839786


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total Steps: 3012, Warmup Steps (10%): 301




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Mcc
1,0.555,0.049509,0.859141,0.878115,0.859141,0.862771,0.755823
2,0.1845,0.05122,0.871628,0.888721,0.871628,0.873229,0.781133
3,0.0898,0.042263,0.901099,0.905101,0.901099,0.900138,0.82657
4,0.0469,0.047064,0.903596,0.905577,0.903596,0.902081,0.826479
5,0.0299,0.045765,0.898102,0.91221,0.898102,0.902091,0.828431
6,0.0143,0.036862,0.920579,0.923622,0.920579,0.921129,0.860386
7,0.0069,0.041853,0.927073,0.927057,0.927073,0.926648,0.869788
8,0.0018,0.04157,0.927572,0.92842,0.927572,0.927677,0.871379
9,0.0009,0.041938,0.928571,0.929977,0.928571,0.929054,0.874051


✅ Best model saved to /content/drive/MyDrive/XLNet_8_Results/desert-sweep-20/best_model


0,1
eval/accuracy,▁▂▅▅▅▇███▇
eval/f1,▁▂▅▅▅▇███▇
eval/loss,▇█▄▆▅▁▃▃▃▁
eval/mcc,▁▂▅▅▅▇███▇
eval/precision,▁▂▅▅▆▇███▇
eval/recall,▁▂▅▅▅▇███▇
eval/runtime,▃▇▁▆▂▅▃▃▅█
eval/samples_per_second,▆▂█▃▇▄▆▆▄▁
eval/steps_per_second,▆▂█▃▇▄▆▆▄▁
eval_accuracy,▁

0,1
eval/accuracy,0.92058
eval/f1,0.92113
eval/loss,0.03686
eval/mcc,0.86039
eval/precision,0.92362
eval/recall,0.92058
eval/runtime,6.9392
eval/samples_per_second,288.507
eval/steps_per_second,36.172
eval_accuracy,0.92058


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
