In [1]:
import torch
from copy import copy

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
# tokenizer = AutoTokenizer.from_pretrained("roberta-base")

In [3]:
def preprocess_function(examples):
    # Repeat each prompt for 5 times to go with the 5 possibilities of each option
    first_sentences = [[context] * 2 for context in examples["startphrase"]]
    # Grab all options
    second_sentences = [[ending1, examples['ending2'][i]] for i, ending1 in enumerate(examples['ending1'])]
#     print(first_sentences)
#     print("====")

    # Flatten everything
    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

#     print(first_sentences)
#     print("====")
#     print(second_sentences)

    # Tokenize
    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    # Un-flatten
    return {
        k: [v[i : i + 2] for i in range(0, len(v), 2)]
        for k, v in tokenized_examples.items()
    }

In [4]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import os
from math import inf

@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: AutoTokenizer
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [5]:
from datasets import load_dataset

In [6]:
# ds = load_dataset("super_glue", "copa", cache_dir="/scratch/afz225/.cache")
ds = load_dataset("nightingal3/fig-qa")

In [7]:
columns = copy(ds['train'].column_names)
# columns.remove('label')
columns.remove('labels')
ds = ds.map(preprocess_function, batched=True, remove_columns=columns)

Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

In [8]:
ds['train']

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 9674
})

In [9]:
# ds = ds.rename_column(original_column_name="label", new_column_name="labels")

In [10]:
ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

In [12]:
from transformers import AutoModelForMultipleChoice
from adapters import init,AutoAdapterModel

# model = AutoModelForMultipleChoice.from_pretrained("roberta-base", cache_dir="/scratch/afz225/.cache")
model = AutoModelForMultipleChoice.from_pretrained("FacebookAI/xlm-roberta-base")
# Enable adapter support
init(model) 

Some weights of XLMRobertaForMultipleChoice were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# model.load_adapter("./col-adapter")

OSError: Unable to load adapter ./col-adapter from any source. Please check the name of the adapter or the source.

In [14]:
model

XLMRobertaForMultipleChoice(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttentionWithAdapters(
              (query): LoRALinearTorch(
                in_features=768, out_features=768, bias=True
                (loras): ModuleDict()
              )
              (key): LoRALinearTorch(
                in_features=768, out_features=768, bias=True
                (loras): ModuleDict()
              )
              (value): LoRALinearTorch(
                in_features=768, out_features=768, 

In [15]:
dialects = ["std-dia", "aus", "hon", "nig", "col", "wel"]

In [16]:
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer

2024-04-27 11:49:10.132531: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-27 11:49:10.132895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-27 11:49:10.211493: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-27 11:49:10.355007: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

In [18]:
dialect = 'aus'

In [19]:
for dialect in dialects:
    if dialect != "std-dia":
        # ds = load_dataset("ashabrawy/dia_copa", dialect, cache_dir="/scratch/afz225/.cache")
        ds = load_dataset("ashabrawy/dia_figqa", dialect)
        columns = copy(ds['train'].column_names)
        # columns.remove('label')
        columns.remove('labels')
        ds = ds.map(preprocess_function, batched=True, remove_columns=columns)
        # ds = ds.rename_column(original_column_name="label", new_column_name="labels")
        # ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    model.add_adapter(dialect, config="seq_bn")
    model.train_adapter(dialect)
    training_args = TrainingArguments(
        learning_rate=1e-4,
        num_train_epochs=6,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        logging_steps=200,
        output_dir=f"/l/users/abdelrahman.sadallah/dialectal_gen-{dialect}",
        overwrite_output_dir=True,
        # The next line is important to ensure the dataset labels are properly passed to the model
        remove_unused_columns=False,
    )
    trainer = AdapterTrainer(
        model=model,
        args=training_args,
        train_dataset=ds["train"],
        eval_dataset=ds["validation"],
        compute_metrics=compute_accuracy,
        data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
    )
    trainer.train()
    model.save_adapter(f"/l/users/abdelrahman.sadallah/dialectal_gen/{dialect}-adapter-figqa-xlmr", dialect, with_head=True)
    

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mbodasadallah2[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
200,0.6941
400,0.6956
600,0.6947
800,0.6937
1000,0.6931
1200,0.6943
1400,0.6939
1600,0.6947
1800,0.6933


Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
200,0.6945
400,0.694
600,0.6944
800,0.6946
1000,0.6941
1200,0.6944
1400,0.6933
1600,0.6946
1800,0.6931


Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
200,0.6945
400,0.6938
600,0.6943
800,0.6936
1000,0.6939
1200,0.6943
1400,0.6945
1600,0.6931
1800,0.6936


Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
200,0.6939
400,0.6934
600,0.6941
800,0.6943
1000,0.6935
1200,0.6956
1400,0.694
1600,0.6935
1800,0.6939


Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
200,0.6936
400,0.6946
600,0.6941
800,0.6937
1000,0.6938
1200,0.6946
1400,0.6937
1600,0.6936
1800,0.6932


Map:   0%|          | 0/9674 [00:00<?, ? examples/s]

Map:   0%|          | 0/1146 [00:00<?, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
200,0.6949
400,0.695
600,0.6942
800,0.6938
1000,0.6936
1200,0.694
1400,0.6935
1600,0.6937
1800,0.6941


In [20]:
model.average_adapter("avg", dialects)

In [21]:
model.active_adapters = "avg"

In [22]:
import pandas as pd

In [23]:
def dialect_eval(model):
    dialects = ["std-dia", "aus", "nig", "wel", "col", "hon"]
    for dialect in dialects:
        if dialect == "std-dia":
            # eval_ds = load_dataset("super_glue", "copa", cache_dir="/scratch/afz225/.cache")['validation']
            eval_ds = load_dataset("nightingal3/fig-qa", cache_dir="/scratch/afz225/.cache")['validation']
            columns = copy(eval_ds.column_names)
            # columns.remove('label')
            columns.remove('labels')
            eval_ds = eval_ds.map(preprocess_function, batched=True, remove_columns=columns)
            # eval_ds = eval_ds.rename_column(original_column_name="label", new_column_name="labels")
        else:
            # eval_ds = load_dataset("ashabrawy/dia_copa", dialect, cache_dir="/scratch/afz225/.cache")['validation']
            eval_ds = load_dataset("ashabrawy/dia_figqa", dialect, cache_dir="/scratch/afz225/.cache")['validation']
            columns = copy(eval_ds.column_names)
            # columns.remove('label')
            columns.remove('labels')
            eval_ds = eval_ds.map(preprocess_function, batched=True, remove_columns=columns)
            # eval_ds = eval_ds.rename_column(original_column_name="label", new_column_name="labels")
        training_args = TrainingArguments(
            learning_rate=1e-4,
            num_train_epochs=6,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=32,
            logging_steps=200,
            output_dir=f"./training_output-{dialect}",
            overwrite_output_dir=True,
            # The next line is important to ensure the dataset labels are properly passed to the model
            remove_unused_columns=False,
        )
        trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=eval_ds,
            eval_dataset=eval_ds,
            compute_metrics=compute_accuracy,
            data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
        )
        eval_metrics = trainer.evaluate(eval_ds)
        eval_metrics['test_set'] = dialect
        
        results = pd.DataFrame(eval_metrics, index=[0]) if dialect == "std-dia" else pd.concat([results, pd.DataFrame(eval_metrics, index=[0])], ignore_index=True)
    return results

In [24]:
dialect_eval(model).to_csv("avg-adapter-figqa-xlmr.csv")

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/155k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/864k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/116k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/120k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/140k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/135k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/991k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/113k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/109k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/140k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/135k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.24M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/142k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/137k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.19M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/136k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/130k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1094 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [25]:
for dialect in dialects:
    model.active_adapters = dialect
    results = dialect_eval(model)
    print(results)
    results.to_csv(f"{dialect}-adapter-figqa-xlmr.csv")

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693111  0.535649        1.2853                  851.158   
1   0.693117  0.525594        1.3441                  813.906   
2   0.693115  0.517367        1.2429                  880.186   
3   0.693118  0.521024        1.3548                  807.525   
4   0.693142  0.500914        1.3841                  790.389   
5   0.693129  0.492687        1.2700                  861.442   

   eval_steps_per_second test_set  
0                 27.231  std-dia  
1                 26.039      aus  
2                 28.160      nig  
3                 25.835      wel  
4                 25.287      col  
5                 27.560      hon  


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693111  0.526508        1.2839                  852.109   
1   0.693114  0.523766        1.3326                  820.961   
2   0.693106  0.524680        1.2102                  904.009   
3   0.693105  0.520110        1.3109                  834.572   
4   0.693110  0.534735        1.3865                  789.044   
5   0.693149  0.481718        1.2727                  859.581   

   eval_steps_per_second test_set  
0                 27.261  std-dia  
1                 26.265      aus  
2                 28.922      nig  
3                 26.700      wel  
4                 25.244      col  
5                 27.500      hon  


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693135  0.521024        1.2920                  846.720   
1   0.693155  0.491773        1.3082                  836.280   
2   0.693140  0.511883        1.2359                  885.198   
3   0.693148  0.489945        1.3502                  810.240   
4   0.693153  0.495430        1.3957                  783.845   
5   0.693143  0.504570        1.2675                  863.121   

   eval_steps_per_second test_set  
0                 27.089  std-dia  
1                 26.755      aus  
2                 28.320      nig  
3                 25.922      wel  
4                 25.077      col  
5                 27.614      hon  


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693118  0.520110        1.2928                  846.210   
1   0.693109  0.519196        1.3439                  814.062   
2   0.693112  0.531079        1.2531                  873.037   
3   0.693111  0.521938        1.3486                  811.237   
4   0.693112  0.531993        1.3907                  786.638   
5   0.693135  0.508227        1.2720                  860.068   

   eval_steps_per_second test_set  
0                 27.073  std-dia  
1                 26.044      aus  
2                 27.931      nig  
3                 25.954      wel  
4                 25.167      col  
5                 27.516      hon  


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693108  0.534735        1.2683                  862.541   
1   0.693102  0.537477        1.3436                  814.227   
2   0.693114  0.524680        1.2271                  891.523   
3   0.693105  0.513711        1.3183                  829.828   
4   0.693111  0.525594        1.3972                  783.020   
5   0.693141  0.488117        1.2708                  860.863   

   eval_steps_per_second test_set  
0                 27.595  std-dia  
1                 26.049      aus  
2                 28.522      nig  
3                 26.548      wel  
4                 25.051      col  
5                 27.541      hon  


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.204, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


   eval_loss  eval_acc  eval_runtime  eval_samples_per_second  \
0   0.693128  0.499086        1.2858                  850.846   
1   0.693114  0.507313        1.3573                  805.985   
2   0.693138  0.517367        1.2361                  885.038   
3   0.693100  0.522852        1.3597                  804.572   
4   0.693126  0.528336        1.3816                  791.833   
5   0.693154  0.495430        1.2627                  866.366   

   eval_steps_per_second test_set  
0                 27.221  std-dia  
1                 25.786      aus  
2                 28.315      nig  
3                 25.740      wel  
4                 25.333      col  
5                 27.717      hon  


wandb: Network error (ReadTimeout), entering retry loop.
