In [None]:
!pip install transformers datasets wandb
!pip install accerate -U

Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting wandb
  Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.16.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
import csv
from datasets import Dataset, DatasetDict

def csv_to_dataset(path, do_train_test_split=True):
    data = []
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            # Replace 'positive' with 1 and 'negative' with 0
            row['Sentiment'] = 1 if row['Sentiment'] == 'POSITIVE' else 0
            data.append(row)

    # Convert the list of dictionaries to a Hugging Face Dataset
    dataset = Dataset.from_dict({key: [d[key] for d in data] for key in data[0]})

    # Rename columns
    dataset = dataset.rename_column("Sentiment", "label")
    dataset = dataset.rename_column("Text", "text")

    # Train test split
    if do_train_test_split:
        dataset = dataset.train_test_split(test_size=0.2, seed=42)
        # rename splits test -> val
        dataset = DatasetDict({
                    "train": dataset["train"],
                    "val": dataset["test"]})


    return dataset

In [None]:
from transformers import AutoTokenizer
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorWithPadding
import numpy as np
from transformers import AutoModelForSequenceClassification
import torch
import os


print("torch version: ", torch.__version__)
if torch.cuda.is_available():
    print("CUDA is available. PyTorch is using GPU.")
else:
    print("CUDA is not available. PyTorch is using CPU.")

def set_wandb_mode(mode):
    os.environ["WANDB_MODE"] = mode

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    N = predictions.shape[0]
    accuracy = (labels == predictions).sum() / N
    return {"accuracy": accuracy}

def train_and_evaluate_models(model_names, train_dataset_path, test_dataset_path, wandb_logging=True, make_val_split=True, wandb_group=None):
    """
    :param model_names:
    :type model_names:
    :param train_dataset_path:
    :type train_dataset_path:
    :return:
    :rtype:
    """

    # build dataset
    if make_val_split:
      train_dataset = csv_to_dataset(train_dataset_path)
    else:
      train_dataset = csv_to_dataset(train_dataset_path, do_train_test_split=False)
    test_dataset = csv_to_dataset(test_dataset_path, do_train_test_split=False)


    for model_name in model_names:
        if isinstance(model_name, str):
          tokenizer_name = model_name
        else:
          tokenizer_name = model_name["tokenizer_name"]
          model_name = model_name["model_name"]
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

        def tokenize(examples):
          return tokenizer(examples["text"], truncation=True)

        # Tokenize Dataset
        if make_val_split:
          tokenized_train = train_dataset["train"].map(tokenize, batched=True)
          tokenized_val = train_dataset["val"].map(tokenize, batched=True)
          tokenized_test = test_dataset.map(tokenize, batched=True)
        else:
          tokenized_train = train_dataset.shuffle(seed=42).map(tokenize, batched=True)
          tokenized_val = test_dataset.map(tokenize, batched=True)

        data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
        # Define training arguments
        training_args = TrainingArguments(
            output_dir=f"./results/{model_name}",
            eval_strategy="steps",
            eval_steps = 0.05,
            save_steps = 0.05,
            logging_dir=f"./logs/{model_name}",
            logging_steps=10,
            report_to="wandb",
            learning_rate = 1e-4,
            run_name=model_name,
            save_total_limit=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=5,
            weight_decay=0.01,
            load_best_model_at_end = True,
            metric_for_best_model = "eval_accuracy",
            greater_is_better = True
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

        model_name = model_name.split(sep='/')[-1].replace(':', '_')

        if wandb_logging:
          wandb.init(project="ChungliAoSentiment", name=model_name.replace("/", "-"), group=wandb_group)

        trainer.train()
        if make_val_split:
          # reset trainer to prevent test results being logged to eval results on wandb
          training_args = TrainingArguments(
            output_dir=f"./results/{model_name}",
            evaluation_strategy="epoch",
            logging_dir=f"./logs/{model_name}",
            logging_steps=10,
            report_to="none",
            run_name=model_name,
            save_total_limit=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=5,
            weight_decay=0.01,
          )

          trainer = Trainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
          )


          test_metrics = trainer.evaluate(eval_dataset=tokenized_test)
          print(test_metrics)
          #replace "eval" with "test" in all dict keys
          test_metrics = {key.replace("eval", "test"): value for key, value in test_metrics.items()}
          #Log test results to W&B summary
          for key, value in test_metrics.items():
            wandb.run.summary[key] = value

        save_path = model_name
        trainer.save_model(save_path)
        # Log the checkpoint as an artifact
        artifact = wandb.Artifact(name="Chungliao-xlm-roberta-sentiment", type="model")
        artifact.add_dir(save_path)
        wandb.log_artifact(artifact)


torch version:  2.4.1+cu121
CUDA is available. PyTorch is using GPU.


In [None]:
model_list = ["google-bert/bert-base-multilingual-cased", "N1ch0/chungliao-mbert-base-cased", "FacebookAI/xlm-roberta-base", "N1ch0/chungliao-xlm-roberta-base", "robzchhangte/MizBERT", "N1ch0/chungliao-MizoBert"]
train_dataset_path = "/content/Chungli_Ao_train_set.csv"
test_data_path = "/content/Test_data_Chungli_ao.csv"

In [None]:
train_and_evaluate_models(model_list, train_dataset_path, test_data_path, make_val_split=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Currently logged in as: [33mn1ch0[0m ([33mn11ch00[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Accuracy
135,0.6988,0.693044,0.507937
270,0.697,0.693034,0.507937
405,0.7027,0.706018,0.507937
540,0.7012,0.693902,0.492063
675,0.7034,0.707083,0.507937
810,0.6962,0.697324,0.507937
945,0.7003,0.694912,0.507937
1080,0.6827,0.70703,0.507937
1215,0.7069,0.694503,0.507937
1350,0.698,0.693534,0.507937


[34m[1mwandb[0m: Adding directory to artifact (./bert-base-multilingual-cased)... Done. 3.5s


tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.92M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/864 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/712M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at N1ch0/chungliao-mbert-base-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

VBox(children=(Label(value='192.276 MB of 678.511 MB uploaded\r'), FloatProgress(value=0.28337915182975276, ma…

0,1
eval/accuracy,███▁███████████████
eval/loss,▁▁▆▁▆▃▂▆▂▁▃▃▁▃▃█▂▁▁
eval/runtime,▁█▆▇▇█▇▇▇▆▇▆█▆▇▆▇▆▅
eval/samples_per_second,█▁▃▂▂▁▂▂▂▃▂▂▁▂▂▂▂▃▄
eval/steps_per_second,█▁▃▂▂▁▂▂▂▃▂▂▁▂▂▂▂▃▄
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█
train/grad_norm,▃▃▂▃▄▅▂▃▇▄▂▃▇▄▃▄▄▄▁▁▂▁▂█▃▄▅▄▂▃▃▂▁▂▇▁▁▂▅▂
train/learning_rate,█████▇▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁
train/loss,▆█▅▃▃▃▄▄▅▂▂▃▄▇▃▆▃▆▂▇▃▄▁▃▃▄▂▃▄▃▂▃▂▃▄▃▃▃▄▁

0,1
eval/accuracy,0.50794
eval/loss,0.69329
eval/runtime,14.9082
eval/samples_per_second,274.681
eval/steps_per_second,17.172
total_flos,5111360833610520.0
train/epoch,5.0
train/global_step,2685.0
train/grad_norm,4.48045
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
135,0.2784,0.558617,0.778755
270,0.1706,0.869049,0.763126
405,0.4913,0.741185,0.765079
540,0.3319,0.558907,0.806838
675,0.1377,0.80366,0.810256
810,0.2105,0.62476,0.817827
945,0.0761,0.788892,0.827595
1080,0.15,0.697879,0.788523
1215,0.2298,0.592459,0.811477
1350,0.2096,0.689382,0.783883


[34m[1mwandb[0m: Adding directory to artifact (./chungliao-mbert-base-cased)... Done. 9.0s


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

VBox(children=(Label(value='184.778 MB of 678.513 MB uploaded\r'), FloatProgress(value=0.27232755787917856, ma…

0,1
eval/accuracy,▃▁▁▆▆▇█▄▆▃▆▂▄▄▄▃▆▄▆
eval/loss,▂█▆▂▇▃▆▅▃▅▂▄▃▃▄▅▂▂▁
eval/runtime,▂▅▅▂▄▄▃▁▅▁█▅▅▆▄▁▆▅▄
eval/samples_per_second,▇▄▄▇▅▅▆█▄█▁▄▄▃▅█▃▄▅
eval/steps_per_second,▇▄▄▇▅▅▆█▄█▁▄▄▃▅█▃▄▅
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██████
train/grad_norm,▂▅▁▁▁▁▁▁█▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,▄▅▅▂▄▃▃▃█▅▄▃▂▃▄▃▂▃▂▃▃▄▄▃▂▁▂▁▂▃▂▂▂▃▃▂▁▂▂▂

0,1
eval/accuracy,0.80855
eval/loss,0.50781
eval/runtime,15.5725
eval/samples_per_second,262.964
eval/steps_per_second,16.439
total_flos,5111360833610520.0
train/epoch,5.0
train/global_step,2685.0
train/grad_norm,0.43154
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
135,0.7045,0.6973,0.507937
270,0.7002,0.693136,0.507937
405,0.6919,0.694362,0.507937
540,0.7174,0.693319,0.492063
675,0.6904,0.71138,0.507937
810,0.6825,0.698026,0.507937
945,0.7035,0.700516,0.507937
1080,0.6744,0.698858,0.507937
1215,0.7018,0.695644,0.507937
1350,0.6968,0.693847,0.492063


[34m[1mwandb[0m: Adding directory to artifact (./xlm-roberta-base)... Done. 145.6s


tokenizer_config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/752 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at N1ch0/chungliao-xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

VBox(children=(Label(value='176.544 MB of 1081.835 MB uploaded\r'), FloatProgress(value=0.16318955937131793, m…

0,1
eval/accuracy,███▁█████▁█████████
eval/loss,▃▁▂▂█▃▄▃▂▂▂▂▁▂▃▂▃▁▁
eval/runtime,▃▅▅▄▅▂▅▅▆▅▇▃▆▅█▃▅▁▅
eval/samples_per_second,▆▄▄▅▄▇▄▄▃▄▂▆▃▄▁▆▄█▄
eval/steps_per_second,▆▄▄▅▄▇▄▄▃▄▂▆▃▄▁▆▄█▄
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▂▇▃▁▂▃▁▂▃▃▂▃▄▆▂▂▁▃▁▄▁▆▄▄▁▁▃▃▃▄▃▂▂▁▁▂▃▅▇█
train/learning_rate,████▇▇▇▆▆▆▆▆▆▆▆▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,█▅▅▅▆▅▅▄▃▅▅▄▅▄▅▄▄▅▄▄▅▅▅▄▅▅▅▅▄▄▅▅▄▅▅▅▄▄▄▁

0,1
eval/accuracy,0.50794
eval/loss,0.69193
eval/runtime,15.6893
eval/samples_per_second,261.006
eval/steps_per_second,16.317
total_flos,4822723894770360.0
train/epoch,5.0
train/global_step,2685.0
train/grad_norm,6.21049
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
135,0.1574,0.906731,0.755556
270,0.2252,0.710031,0.788767
405,0.0968,0.696178,0.769719
540,0.2922,0.705538,0.687668
675,0.205,0.752929,0.755067
810,0.2224,0.62276,0.784615
945,0.0997,0.778299,0.768498
1080,0.0925,0.671995,0.809768
1215,0.2059,0.531886,0.81685
1350,0.2358,0.604844,0.795116


[34m[1mwandb[0m: Adding directory to artifact (./chungliao-xlm-roberta-base)... Done. 131.6s


tokenizer_config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/221k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/701k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at robzchhangte/MizBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

VBox(children=(Label(value='75.277 MB of 1060.709 MB uploaded\r'), FloatProgress(value=0.07096897379535737, ma…

0,1
eval/accuracy,▅▆▅▁▅▆▅██▇▆▆▅▆▇▆█▇▆
eval/loss,█▅▅▅▆▄▆▄▂▃▂▄▄▁▁▃▂▂▁
eval/runtime,▅▁▆▄▅▅▆▅█▄▆▆▅▅▆▃▆▆▂
eval/samples_per_second,▄█▃▅▄▄▃▄▁▅▃▃▄▄▃▆▃▃▇
eval/steps_per_second,▄█▃▅▄▄▃▄▁▅▃▃▄▄▃▆▃▃▇
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▁▁▁▁▁▁▁▁▁▁▂▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,█████▇▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁
train/loss,█▄▂▂▃▂▂▂▂▅▂▃▅▂▃▃▂▄▁▂▃▃▃▂▂▄▂▁▂▂▃▁▂▃▂▂▃▂▁▁

0,1
eval/accuracy,0.78901
eval/loss,0.4716
eval/runtime,15.5881
eval/samples_per_second,262.701
eval/steps_per_second,16.423
total_flos,4822723894770360.0
train/epoch,5.0
train/global_step,2685.0
train/grad_norm,0.17913
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
135,0.2315,1.010635,0.696703
270,0.1586,0.805572,0.717216
405,0.112,1.087638,0.718926
540,0.3762,0.693042,0.760928
675,0.0853,0.905573,0.762149
810,0.2355,0.783184,0.764103
945,0.1261,0.796271,0.766056
1080,0.1275,0.708241,0.769719
1215,0.2457,0.709334,0.719902
1350,0.2512,0.67293,0.783394


[34m[1mwandb[0m: Adding directory to artifact (./MizBERT)... Done. 2.1s


tokenizer_config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/221k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/701k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at N1ch0/chungliao-MizoBert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8579 [00:00<?, ? examples/s]

Map:   0%|          | 0/4095 [00:00<?, ? examples/s]

VBox(children=(Label(value='89.174 MB of 418.578 MB uploaded\r'), FloatProgress(value=0.2130399086453495, max=…

0,1
eval/accuracy,▁▂▃▅▅▆▆▆▃▇█▅▇▁▆▆▅▅▆
eval/loss,▇▃█▁▅▃▃▂▂▁▁▄▁▆▂▅▄▂▂
eval/runtime,█▅▆▃▅▄▅▆▆▃▂▄▃▃▁▃▁▄▂
eval/samples_per_second,▁▄▃▆▄▅▄▃▃▆▇▅▆▆█▆█▅▇
eval/steps_per_second,▁▄▃▆▄▅▄▃▃▆▇▅▆▆█▆█▅▇
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇███
train/grad_norm,▁▂▁▂▁▁▃▁▁█▂▁▁▂▁▃▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▅▁▁▁▁▂▁▁▂
train/learning_rate,█████▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁
train/loss,▄█▃▅▃▂▂▃▃▂▃▃▁▂▂▁▂▄▄▂▃▂▂▂▃▃▃▂▂▃▂▂▂▁▃▂▂▃▂▄

0,1
eval/accuracy,0.76386
eval/loss,0.74569
eval/runtime,13.9471
eval/samples_per_second,293.609
eval/steps_per_second,18.355
total_flos,4651970986507080.0
train/epoch,5.0
train/global_step,2685.0
train/grad_norm,0.55794
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
135,0.1498,0.566932,0.780708
270,0.2263,0.695057,0.767766
405,0.0785,0.627159,0.785348
540,0.3901,0.727084,0.791209
675,0.1103,0.632607,0.813431
810,0.205,0.614096,0.82149
945,0.0619,0.857664,0.774603
1080,0.0892,0.621977,0.807814
1215,0.2177,0.595664,0.797314
1350,0.2014,0.664943,0.796825


[34m[1mwandb[0m: Adding directory to artifact (./chungliao-MizoBert)... Done. 1.8s
