In [1]:
%pip install transformers datasets peft wandb

Note: you may need to restart the kernel to use updated packages.


In [2]:
import transformers
import datasets 
import peft

In [3]:
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, DatasetDict

lora_config = LoraConfig(
    r=4, # number of the parameters to train.
    lora_alpha=1, # magnitude of the weight matrix
    target_modules=["query", "value"],
    lora_dropout=0.05,
    bias="lora_only",
    task_type="SEQ_CLS" # sequence classification
)

In [4]:
from transformers import BertTokenizer, BertModel
model_checkpoint = "google-bert/bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_checkpoint, use_fast=True)
foundation_model = BertModel.from_pretrained(model_checkpoint)

In [5]:
import pandas as pd

DATA_PATH = "/home/jovyan/work/ULM-25-authorship-profiling/data/"

df_train = pd.read_csv(DATA_PATH + "data_train.csv")
df_test = pd.read_csv(DATA_PATH + "data_test.csv")
df_val = pd.read_csv(DATA_PATH + "data_val.csv")

In [6]:
dataset = DatasetDict()
dataset['train'] = Dataset.from_pandas(df_train)
dataset['validation'] = Dataset.from_pandas(df_val)
dataset['test'] = Dataset.from_pandas(df_test)

In [7]:
dataset = dataset.filter(lambda example: 
                         example["text"] is not None and 
                         example["gender"] is not None and
                         example ["age"] is not None)
dataset

Filter:   0%|          | 0/620813 [00:00<?, ? examples/s]

Filter:   0%|          | 0/68980 [00:00<?, ? examples/s]

Filter:   0%|          | 0/37919 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'age', 'gender'],
        num_rows: 620812
    })
    validation: Dataset({
        features: ['text', 'age', 'gender'],
        num_rows: 68980
    })
    test: Dataset({
        features: ['text', 'age', 'gender'],
        num_rows: 37919
    })
})

In [8]:
from transformers import AutoTokenizer, AutoModel, PreTrainedModel, PretrainedConfig
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import numpy as np

age_encoder = LabelEncoder()
age_encoder.fit(df_train["age"].tolist() + df_val["age"].tolist() + df_test["age"].tolist())
num_age_labels = len(age_encoder.classes_)

BINS = [0, 18, 23, 27, 33, 37, 43, 47, 53, 57, 100]
def preprocess_function_dict(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        padding='max_length',
        max_length=512
    )
    # age_labels = age_encoder.transform(examples["age"])
    age_labels = [
            int(np.digitize(item, BINS) - 1)
            for item in examples["age"]
        ]
    gender_labels = [
        {"male": 0, "female": 1}[label] for label in examples["gender"]
    ]
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "age_labels": age_labels,
        "gender_labels": gender_labels,
        "labels": np.stack([age_labels, gender_labels], axis=1)
    }

In [9]:
tokenized_dataset = dataset.map(
    preprocess_function_dict,
    batched=True,
    remove_columns=['text', 'age', 'gender']
)

Map:   0%|          | 0/620812 [00:00<?, ? examples/s]

Map:   0%|          | 0/68980 [00:00<?, ? examples/s]

Map:   0%|          | 0/37919 [00:00<?, ? examples/s]

In [10]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'age_labels', 'gender_labels', 'labels'],
        num_rows: 620812
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'age_labels', 'gender_labels', 'labels'],
        num_rows: 68980
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'age_labels', 'gender_labels', 'labels'],
        num_rows: 37919
    })
})

In [11]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [12]:
tokenized_dataset.push_to_hub(
    "KonradBRG/ULM-Profiling-tokenized",
    private=False,  # or True if you want it private
    commit_message="Upload tokenized ULM profiling dataset"
)

Uploading the dataset shards:   0%|          | 0/4 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/156 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/156 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/156 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/156 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/69 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/38 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/KonradBRG/ULM-Profiling-tokenized/commit/da3151e8ae81d36c4b16cab46907282cf7fbab93', commit_message='Upload tokenized ULM profiling dataset', commit_description='', oid='da3151e8ae81d36c4b16cab46907282cf7fbab93', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/KonradBRG/ULM-Profiling-tokenized', endpoint='https://huggingface.co', repo_type='dataset', repo_id='KonradBRG/ULM-Profiling-tokenized'), pr_revision=None, pr_num=None)

In [13]:
class JointClassificationConfig(PretrainedConfig):
    def __init__(self, num_age_labels=None, num_gender_labels=None, loss_alpha=0.5, **kwargs):
        super().__init__(**kwargs)
        self.num_age_labels = num_age_labels
        self.num_gender_labels = num_gender_labels

In [14]:
from transformers.modeling_outputs import SequenceClassifierOutput
from torch.nn import CrossEntropyLoss

class BertForJointClassification(PreTrainedModel):
    config_class = JointClassificationConfig
    
    def __init__(self, config, model):
        super().__init__(config)
        self.num_age_labels = config.num_age_labels
        self.num_gender_labels = config.num_gender_labels

        self.bert = model
        self.age_classifier = nn.Linear(config.hidden_size, self.num_age_labels)
        self.gender_classifier = nn.Linear(config.hidden_size, self.num_gender_labels)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.age_loss_fct = CrossEntropyLoss()
        self.gender_loss_fct = CrossEntropyLoss()
        
        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        age_labels=None,
        gender_labels=None,
        **kwargs
    ):
        # get BERT outputs
        x = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        # pool outputs to get sequence representation
        x = x.last_hidden_state[:, 0]
        x = self.dropout(x)
        # get logits from task head
        age_logits = self.age_classifier(x)
        gender_logits = self.gender_classifier(x)
        loss_age = self.age_loss_fct(age_logits, age_labels)
        loss_gender = self.gender_loss_fct(gender_logits, gender_labels)
        return SequenceClassifierOutput(loss={"loss_age": loss_age, "loss_gender": loss_gender}, 
                                        logits=torch.cat([age_logits, gender_logits], dim=1))

In [15]:
def setup_model(dataset, model_name="bert-base-uncased", peft=True):        
    config = JointClassificationConfig(
        name_or_path=model_name,
        num_age_labels=num_age_labels,
        num_gender_labels=2,
        hidden_size=768,
        hidden_dropout_prob=0.1,
    )
    bert = BertForJointClassification(config, foundation_model)
    if peft:
        bert = get_peft_model(bert, lora_config)
    return bert, config, dataset

In [None]:
# optional
peft_model.unload()

In [None]:
peft_model, config, data = setup_model(tokenized_dataset, model_checkpoint)

In [16]:
def compute_metrics(eval_pred):
    logits = eval_pred.predictions
    age_labels, gender_labels = eval_pred.label_ids

    age_logits = logits[:, :num_age_labels]
    gender_logits = logits[:, num_age_labels:]

    age_preds = np.argmax(age_logits, axis=-1)
    gender_preds = np.argmax(gender_logits, axis=-1)

    age_acc = (age_preds == age_labels).mean()
    gender_acc = (gender_preds == gender_labels).mean()
    joint_acc = np.mean((age_preds == age_labels) & (gender_preds == gender_labels))

    return {"age_acc": age_acc, "gender_acc": gender_acc, "joint_acc": joint_acc}

In [17]:
from typing import Dict, Union, Any
from transformers import Trainer, TrainingArguments, default_data_collator

class TrainerWithCustomLoss(Trainer):
    
    def __init__(self, age_alpha: float = 0.5, scale_losses = True, **kwargs):
        super().__init__(**kwargs)
        self.age_alpha = age_alpha
        self.gender_alpha = 1 - self.age_alpha
        self._scale_losses = scale_losses
    
    def training_step(
        self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], num_items_in_batch=None
    ) -> torch.Tensor:
        model.train()
        if hasattr(self.optimizer, "train") and callable(self.optimizer.train):
            self.optimizer.train()
        inputs = self._prepare_inputs(inputs)
        
        with self.compute_loss_context_manager():
            loss = self.compute_loss(model, inputs)
        self.accelerator.backward(loss)
        
        # Finally we need to normalize the loss for reporting
        if num_items_in_batch is None:
            return loss.detach() / self.args.gradient_accumulation_steps
        return loss.detach()

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        outputs = model(**inputs)
        loss_age, loss_gender = outputs.loss["loss_age"], outputs.loss["loss_gender"]
        loss = self.age_alpha * loss_age + self.gender_alpha * loss_gender
        
        if return_outputs:
            return loss, outputs
        return loss

In [None]:
del trainer

In [None]:
from transformers import Trainer, TrainingArguments, default_data_collator

print(peft_model.print_trainable_parameters())
log_frequency = 1000

training_args = TrainingArguments(
    "bert-lora-for-author-profiling",
    per_device_train_batch_size=32,  # Set explicitly for better control
    gradient_accumulation_steps=2,   # Effective batch size = 32 * 2 = 64
    num_train_epochs=3,
    # Speed optimizations
    dataloader_num_workers=4,        # Parallel data loading
    dataloader_pin_memory=True,      # Faster GPU transfer
    bf16=True,                       # Mixed precision (if you have newer GPU)
    # fp16=True,                     # Use this instead if bf16 not supported
    # Reduce overhead
    eval_strategy="steps",           # Less frequent evaluation
    eval_steps=500,                  # Adjust based on dataset size
    logging_strategy="steps",
    logging_steps=log_frequency,
    save_strategy="no",
    learning_rate=5e-5,
    report_to="wandb"
)

trainer = TrainerWithCustomLoss(
    age_alpha=0.7,
    model=peft_model,
    args=training_args,
    train_dataset=data['train'].with_format("torch"),
    eval_dataset=data['validation'].with_format("torch"),
    data_collator=default_data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
trainer.predict(data["test"])

In [18]:
import wandb
wandb.finish()
sweep_config = {
    "name": "LoRA-For-Author-Profiling",
    "method": "bayes",
    "metric": {
        "name": "Joint Acc",
        "goal": "maximize"
    },
    "parameters": {
        "learning_rate": {
            "min": 0.00005,
            "max": 0.0001
        },
        "age_alpha": {
            "min": 0.4,
            "max": 0.9
        },
        "per_device_train_batch_size": {
            "values": [16, 32, 64]
        },
        "num_train_epochs": {
            "values": [2, 3, 4]
        },
        "peft_r_value": {
            "values": [2, 4, 8]
        },
    }
}

# Create the sweep
sweep_id = wandb.sweep(sweep_config, project="ULM-Author-Profiling")

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: egrnddjo
Sweep URL: https://wandb.ai/konrad-brg-university-of-t-bingen/ULM-Author-Profiling/sweeps/egrnddjo


In [19]:
def train(config=None):
    # Initialize wandb run with the config from sweep
    with wandb.init(config=config):
        peft_r_value = wandb.config.peft_r_value
        learning_rate = wandb.config.learning_rate
        age_alpha = wandb.config.age_alpha
        batch_size = wandb.config.per_device_train_batch_size
        epochs = wandb.config.num_train_epochs

        lora_config = LoraConfig(
            r=peft_r_value, # number of the parameters to train.
            lora_alpha=1, # magnitude of the weight matrix
            target_modules=["query", "value"],
            lora_dropout=0.05,
            bias="lora_only",
            task_type="SEQ_CLS" # sequence classification
        )
        peft_model, config, data = setup_model(tokenized_dataset, model_checkpoint)

        # Set up training arguments
        training_args = TrainingArguments(
            output_dir="bert-lora-for-author-profiling",
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=2,
            num_train_epochs=epochs,
            # Speed optimizations
            dataloader_num_workers=4,    
            dataloader_pin_memory=True,
            bf16=True,
            eval_strategy="steps",         
            eval_steps=1000,                  
            logging_strategy="steps",
            logging_steps=1000,
            save_strategy="no",
            learning_rate=learning_rate,
            report_to="wandb"
        )
        
        trainer = TrainerWithCustomLoss(
            age_alpha=age_alpha,
            model=peft_model,
            args=training_args,
            train_dataset=data['train'].with_format("torch"),  # Make sure data is defined
            eval_dataset=data['validation'].with_format("torch"),
            data_collator=default_data_collator,
            compute_metrics=compute_metrics,
        )
        try:
            trainer.train()
            eval_results = trainer.evaluate()
            wandb.log(eval_results)
            print(f"Training completed! Final Joint Acc: {eval_results.get('eval_Joint Acc', 'N/A')}")
        finally:
            # Ensure cleanup happens even if training fails
            peft_model.unload()
            del trainer
            del peft_model
            torch.cuda.empty_cache()  # Clear GPU memory

In [None]:
wandb.agent(sweep_id, train, count=10)  # Will run 10 different hyperparameter combinations

[34m[1mwandb[0m: Agent Starting Run: oyjz2f8m with config:
[34m[1mwandb[0m: 	age_alpha: 0.5256867795052806
[34m[1mwandb[0m: 	learning_rate: 8.377865352728644e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	peft_r_value: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkonrad-brg[0m ([33mkonrad-brg-university-of-t-bingen[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.0591,0.962221,0.532183,0.643592,0.342723
2000,0.9517,0.927538,0.553552,0.662163,0.36831
3000,0.9297,0.91095,0.56109,0.66973,0.378095
4000,0.9157,0.902783,0.5657,0.674543,0.383734
5000,0.9122,0.896584,0.568484,0.67737,0.388301
6000,0.9004,0.8906,0.572369,0.681038,0.393563
7000,0.8959,0.887173,0.574543,0.682575,0.397101
8000,0.8944,0.882576,0.576674,0.684836,0.399623
9000,0.8899,0.879686,0.579313,0.684778,0.402363
10000,0.8892,0.877337,0.580168,0.686851,0.403392


Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇█▇████████████
eval/gender_acc,▁▃▄▅▅▆▆▆▆▇▇██▇████████████████
eval/joint_acc,▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇██▇████████████
eval/loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▇▁▂▄▃▄▅▁▇▅█▃▆▅▃▅▁▅▆▂▄▅█▇▆▆▆▅█▂
eval/samples_per_second,▂█▇▄▆▅▄█▂▄▁▆▃▄▆▄█▄▃▇▅▄▁▂▃▃▃▄▁▇
eval/steps_per_second,▂█▇▅▆▅▄█▂▄▁▆▃▄▆▄█▄▃▇▅▄▁▂▃▃▃▄▁▇
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,3.0
eval/age_acc,0.59081
eval/gender_acc,0.69616
eval/joint_acc,0.41808
eval/loss,0.85755
eval/runtime,81.9993
eval/samples_per_second,841.226
eval/steps_per_second,105.159
eval_age_acc,0.59081
eval_gender_acc,0.69616


[34m[1mwandb[0m: Agent Starting Run: nlttzuec with config:
[34m[1mwandb[0m: 	age_alpha: 0.480985483564206
[34m[1mwandb[0m: 	learning_rate: 6.903238103779662e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	peft_r_value: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.0379,0.945776,0.524964,0.639388,0.336257
2000,0.9324,0.906891,0.552609,0.657335,0.36512
3000,0.9056,0.887276,0.561177,0.671296,0.379443
4000,0.891,0.87794,0.566469,0.677095,0.386808
5000,0.887,0.87181,0.568832,0.678863,0.390794
6000,0.877,0.866639,0.571731,0.681357,0.394622
7000,0.8722,0.863836,0.574195,0.682256,0.397883
8000,0.8704,0.859057,0.576718,0.68559,0.400681
9000,0.8666,0.857076,0.577733,0.684633,0.401508
10000,0.8659,0.853903,0.580255,0.686779,0.404146


Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇█▇███████████████████
eval/gender_acc,▁▃▅▆▆▆▆▇▆▇▇▇▇▇▇██▇█████████████████████
eval/joint_acc,▁▃▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
eval/loss,█▆▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,█▃▄▅▄▅▅▅▄▇▁▄▃▆▄▅▅▆▆▆▆▄▃▅▂▃▇▆▅▄▆▆▆▃▇▆▇▄▂
eval/samples_per_second,▁▆▅▄▄▄▄▄▅▁█▅▆▃▅▄▄▃▃▃▃▅▆▄▇▆▂▃▄▅▃▃▃▆▂▃▂▅▇
eval/steps_per_second,▁▆▅▄▅▄▄▄▅▁█▅▆▃▅▄▄▃▃▃▃▅▆▄▇▆▂▃▄▅▃▃▃▆▂▃▂▅▇
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,4.0
eval/age_acc,0.59142
eval/gender_acc,0.69723
eval/joint_acc,0.42014
eval/loss,0.83017
eval/runtime,81.3075
eval/samples_per_second,848.384
eval/steps_per_second,106.054
eval_age_acc,0.59142
eval_gender_acc,0.69723


[34m[1mwandb[0m: Agent Starting Run: 76jcn6eq with config:
[34m[1mwandb[0m: 	age_alpha: 0.8537993281786413
[34m[1mwandb[0m: 	learning_rate: 9.90563000535912e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	peft_r_value: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 64
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.2529,1.122801,0.554625,0.650768,0.362424
2000,1.1118,1.088208,0.569991,0.663627,0.381212
3000,1.0924,1.073313,0.5755,0.670281,0.388837
4000,1.0778,1.062691,0.57982,0.671398,0.392983
5000,1.0724,1.055548,0.583097,0.675689,0.398115
6000,1.0647,1.049916,0.584633,0.677312,0.400536
7000,1.0597,1.044973,0.587634,0.67621,0.402566
8000,1.0491,1.0409,0.58904,0.677878,0.404407
9000,1.048,1.040058,0.589113,0.680009,0.406161
10000,1.0427,1.033717,0.592012,0.680632,0.407553


Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▄▄▅▆▆▆▇▇▇▇█████████
eval/gender_acc,▁▄▅▅▆▇▆▇▇▇▇█████████
eval/joint_acc,▁▄▅▅▆▆▆▇▇▇▇▇████████
eval/loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁
eval/runtime,▄▄▁▅▂▃▄█▅▁▇▂▆▂█▅▇█▇▆
eval/samples_per_second,▅▅█▄▇▆▅▁▄█▂▇▃▇▁▄▂▁▂▃
eval/steps_per_second,▅▅█▄▇▆▅▁▄█▂▇▃▇▁▄▂▁▂▃
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,4.0
eval/age_acc,0.59625
eval/gender_acc,0.68304
eval/joint_acc,0.41326
eval/loss,1.0231
eval/runtime,82.8978
eval/samples_per_second,832.109
eval/steps_per_second,104.02
eval_age_acc,0.59625
eval_gender_acc,0.68304


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3vltbdyd with config:
[34m[1mwandb[0m: 	age_alpha: 0.4120563317096145
[34m[1mwandb[0m: 	learning_rate: 8.586771270684372e-05
[34m[1mwandb[0m: 	num_train_epochs: 2
[34m[1mwandb[0m: 	peft_r_value: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,0.9653,0.87734,0.54352,0.655175,0.355842
2000,0.8681,0.848155,0.560235,0.67147,0.378675
3000,0.8511,0.83547,0.567048,0.681255,0.390591
4000,0.8399,0.828326,0.569948,0.68517,0.395173
5000,0.8372,0.824362,0.572166,0.684764,0.397695
6000,0.8292,0.819459,0.574601,0.688315,0.400841
7000,0.8255,0.817959,0.576428,0.687591,0.402247
8000,0.8246,0.814432,0.579356,0.689736,0.405973
9000,0.8213,0.813171,0.580226,0.688939,0.406335
10000,0.8206,0.809783,0.582154,0.69236,0.408742


Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▄▅▅▆▆▆▇▇▇▇▇████████
eval/gender_acc,▁▄▅▆▆▇▇▇▇▇▇██▇██████
eval/joint_acc,▁▄▅▆▆▆▇▇▇▇▇█████████
eval/loss,█▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
eval/runtime,▃▄▄▄▄▆▂▄▃▄▅▃▄▃▅▁▂▂▄█
eval/samples_per_second,▆▅▅▅▅▃▇▅▆▄▄▆▅▆▄█▇▇▅▁
eval/steps_per_second,▆▅▅▅▅▃▇▅▆▄▄▆▅▆▄█▇▇▅▁
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,2.0
eval/age_acc,0.58543
eval/gender_acc,0.69571
eval/joint_acc,0.41445
eval/loss,0.80203
eval/runtime,82.4807
eval/samples_per_second,836.317
eval/steps_per_second,104.546
eval_age_acc,0.58543
eval_gender_acc,0.69571


[34m[1mwandb[0m: Agent Starting Run: 2724witm with config:
[34m[1mwandb[0m: 	age_alpha: 0.7513140961141846
[34m[1mwandb[0m: 	learning_rate: 7.030190749062255e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	peft_r_value: 2
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.2439,1.111436,0.527138,0.646158,0.339664
2000,1.0895,1.067117,0.552334,0.65432,0.362801
3000,1.0596,1.046658,0.560148,0.666454,0.375993
4000,1.0561,1.036267,0.565845,0.670586,0.383836
5000,1.0448,1.028552,0.569136,0.672601,0.388011
6000,1.0345,1.022261,0.572572,0.673456,0.390476
7000,1.0269,1.020252,0.575152,0.668991,0.389475
8000,1.0222,1.01419,0.576109,0.677979,0.396231
9000,1.0334,1.009665,0.579211,0.677486,0.398304
10000,1.0172,1.008479,0.579197,0.678008,0.399377


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▃▅▅▆▆▇▇▇▇▇█████████
eval/gender_acc,▁▄▅▆▆▇▇▆▇▇▇██▇██████
eval/joint_acc,▁▄▅▅▆▆▇▇▇▇▇▇████████
eval/loss,█▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁
eval/runtime,▁█▃▄▃▅▄▇▄▅▄▆▇▅█▄▄▇▆▆
eval/samples_per_second,█▁▆▅▅▄▅▂▅▄▅▃▂▄▁▅▅▂▃▃
eval/steps_per_second,█▁▆▅▅▄▅▂▅▄▅▃▂▄▁▅▅▂▃▃
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,2.0
eval/age_acc,0.58974
eval/gender_acc,0.69198
eval/joint_acc,0.41473
eval/loss,0.9125
eval/runtime,82.0834
eval/samples_per_second,840.365
eval/steps_per_second,105.052
eval_age_acc,0.58974
eval_gender_acc,0.69198


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xk922e4s with config:
[34m[1mwandb[0m: 	age_alpha: 0.512584843797316
[34m[1mwandb[0m: 	learning_rate: 5.058408191938331e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	peft_r_value: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.0595,0.945224,0.535822,0.657524,0.35274
2000,0.9355,0.911705,0.556814,0.669498,0.37608
3000,0.9151,0.89779,0.564424,0.675979,0.386373
4000,0.9018,0.891227,0.566932,0.679864,0.390548
5000,0.9006,0.886016,0.570774,0.681067,0.394303
6000,0.8909,0.88175,0.573239,0.683531,0.397333
7000,0.888,0.879847,0.574688,0.68301,0.399507
8000,0.887,0.875918,0.577298,0.68572,0.40287
9000,0.8843,0.874265,0.578545,0.685648,0.404117
10000,0.8829,0.871132,0.580081,0.687794,0.405828


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter serve

Training completed! Final Joint Acc: N/A


0,1
epoch,▁
eval/age_acc,▁▄▅▅▆▆▆▇▇▇▇▇▇▇▇███████████████
eval/gender_acc,▁▃▅▅▆▆▆▆▆▇▇▇▇▇▇███████████████
eval/joint_acc,▁▄▅▅▆▆▆▇▇▇▇▇▇▇▇███████████████
eval/loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▃▃▅▅▄▂▃▃▃▅█▂▃▆▆▅▃▇▆▇▁▇▅▄▆▄▁▆▄▅
eval/samples_per_second,▆▆▄▄▅▇▆▆▆▄▁▇▆▃▃▄▆▂▃▂█▂▄▅▃▅█▃▅▄
eval/steps_per_second,▆▆▄▄▅▇▆▆▆▄▁▇▆▃▃▄▆▂▃▂█▂▄▅▃▅█▃▅▄
eval_age_acc,▁
eval_gender_acc,▁

0,1
epoch,3.0
eval/age_acc,0.587
eval/gender_acc,0.6928
eval/joint_acc,0.41448
eval/loss,0.85718
eval/runtime,82.0824
eval/samples_per_second,840.375
eval/steps_per_second,105.053
eval_age_acc,0.587
eval_gender_acc,0.6928


[34m[1mwandb[0m: Agent Starting Run: w1asbw4q with config:
[34m[1mwandb[0m: 	age_alpha: 0.8378916491056481
[34m[1mwandb[0m: 	learning_rate: 7.168443274187567e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	peft_r_value: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 64
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Step,Training Loss,Validation Loss,Age Acc,Gender Acc,Joint Acc
1000,1.2355,1.100164,0.562873,0.658626,0.373485
2000,1.0956,1.074118,0.573485,0.672427,0.389823
3000,1.08,1.062751,0.577515,0.676051,0.395506
4000,1.0691,1.054345,0.580415,0.67679,0.398869
5000,1.0657,1.049634,0.582227,0.67969,0.400986
6000,1.0587,1.04339,0.583981,0.681313,0.40403
7000,1.0542,1.040646,0.586155,0.678501,0.403943
8000,1.0448,1.038728,0.586228,0.679139,0.404364


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
trainer.push_to_hub()