In [2]:
from classification_classes import * 

from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping
import wandb
import argparse
import re
import os
from transformers import AutoTokenizer
import pytorch_lightning as pl
import pandas as pd
import numpy as np

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# MODEL_NAME_OR_PATH = "bert-base-german-cased"
# DATASETS = ["RP-Crowd-3", "RP-Crowd-2", "RP-Mod"]
DATASET_PATHS = ["./Datasets/RP-Crowd-3-folds.csv", "./Datasets/RP-Crowd-2-folds.csv", "./Datasets/RP-Mod-folds.csv", \
    "./Datasets/resampled/200shap-folds.csv"]
# DATASET_PATH = "/home/dobby/Datasets/resampled/200shap-folds.csv"
# WANDB_PROJECT_NAME = f"{MODEL_NAME_OR_PATH}-all-datasets"
OUTPUT_DIR = f"./german-bert-results/"

TUNING_LEARNING_RATE = False

CONFIGS = [

    {
        "model_class": T5FineTuner, 
        "learning_rate": 0.0001,
        "weight_decay": 0.1,
        "model_name_or_path": "GermanT5/t5-efficient-oscar-german-small-el32",
        "model_name": "t5-efficient-oscar-german-small-el32"
    }, 
    # {
    #     "model_class": T5FineTuner, 
    #     "learning_rate": 0.0001,
    #     "weight_decay": 0.1,
    #     "model_name_or_path": "google/mt5-small",
    #     "model_name": "mt5-small"
    # },
    # {
    #     "model_class": T5FineTuner, 
    #     "learning_rate": 0.0001,
    #     "weight_decay": 0.1,
    #     "model_name_or_path": "google/mt5-base",
    #     "model_name": "mt5-base"
    # },
    # {
    #     "model_class": T5FineTuner, 
    #     "learning_rate": 5.6e-5,
    #     "weight_decay": 0.1,
    #     "model_name_or_path": "GermanT5/german-t5-oscar-ep1-prompted-germanquad",
    #     "model_name": "german-t5-oscar-ep1-prompted-germanquad"
    # },
    # {
    #     "model_class": Enc1T5,
    #     "learning_rate": 0.0001,
    #     "weight_decay": 0.1, 
    #     "model_name_or_path": "GermanT5/t5-efficient-oscar-german-small-el32",
    #     "model_name": "Enc1T5-t5-efficient-oscar-german-small-el32"
    # }, 
    {
        "model_class": BertFineTuner,
        "learning_rate": 0.0001, 
        "weight_decay": 0.1, 
        "model_name_or_path": "bert-base-german-cased",
        "model_name": "german-bert"
    }, 
    # {
    #     "model_class": BertFineTuner,
    #     #TODO: change these
    #     "learning_rate": 5.6e-5, 
    #     "weight_decay": 0.01,
    #     "model_name_or_path": "xlm-roberta-base",
    #     "model_name": "xlm-roberta"
    # }, 
    {
        "model_class": ElectraFineTuner,
        "learning_rate": 0.0001,
        "weight_decay": 0.1,
        "model_name_or_path": "deepset/gelectra-base"
    }
]

PARENT_DIR = "./repeated-reruns/"
NUM_RUNS = 2
EPOCHS = 3

metrics = ["accuracy", "f1", "recall", "precision"]
results_df = pd.DataFrame(columns = ["model_name"] + metrics)

args_dict = dict(
    # other trainer params
    adam_epsilon = 1e-8,
    adam_betas = (0.9, 0.999),
    warmup_steps = 0,
    n_gpu = 1,
    gradient_accumulation_steps = 16,
    num_train_epochs = EPOCHS,
    # other params
    train_batch_size=8,
    eval_batch_size=8,
    data_dir="", # path for data files
    output_dir=OUTPUT_DIR, # path to save the checkpoints
    max_seq_length=512,
    early_stop_callback=True,
    fp_16=False, # if you want to enable 16-bit training then install apex and set this to true
    opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties
    max_grad_norm=0.5, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default
    seed=42,
)

train_params = dict(
    accumulate_grad_batches = args_dict["gradient_accumulation_steps"],
    gpus = args_dict["n_gpu"],
    max_epochs = EPOCHS, 
    precision = 16 if args_dict["fp_16"] else 32,
    amp_level = '01',
    amp_backend = "apex",
    gradient_clip_val = args_dict["max_grad_norm"]
)

for config in CONFIGS:
    !nvidia-smi
    # pick output_dir
    #TODO: generalize this to other os's besides Linux
    model_name = config["model_name"]
    print(model_name)
    output_dir = PARENT_DIR + model_name

    if config["model_class"] == T5FineTuner:
        dataset_fetcher = GetDatasets(False)
    else:
        dataset_fetcher = GetDatasets(True)

    for source in DATASET_PATHS:
        tokenizer = AutoTokenizer.from_pretrained(config["model_name_or_path"])
        # args_dict["train_dataset"], args_dict["valid_dataset"] = dataset_fetcher.load_dataset(source, tokenizer)

        dataset_name = os.path.basename(source).rstrip(".csv")
        # print(dataset_name)
        wandb_project_name = f"repeated-reruns-{model_name}-{dataset_name}"

        for key in config.keys():
            args_dict[key] = config[key]
        
        args_dict["dataset_path"] = source

        run_metrics = {}
        
        for i in range(NUM_RUNS):
            wandb.finish()
            run_name = f"run-{i}"

            checkpoint_callback = pl.callbacks.ModelCheckpoint(
                                                        dirpath=output_dir + "/checkpoints/", 
                                                        filename="{epoch}-{val_accuracy:.2f}-{val_loss:.2f}", 
                                                        monitor="val_accuracy", mode="max", save_top_k=1
                                                        )
            
            wandb_logger = WandbLogger(project=wandb_project_name, name=run_name)
            for metric in metrics:
                print(f"val_{metric}")
                wandb.define_metric(f"val_{metric}", summary="max")
            
            early_stop_callback = EarlyStopping(monitor="val_accuracy", patience=3, mode="max")
            train_params["logger"] = wandb_logger
            train_params["callbacks"] = [early_stop_callback, checkpoint_callback]

            args_dict["train_dataset"], args_dict["valid_dataset"] = dataset_fetcher.load_dataset(source, tokenizer)
            args = argparse.Namespace(**args_dict)

            model = config["model_class"](args)
            trainer = pl.Trainer(**train_params)
            trainer.fit(model)

            for metric in metrics:
                run_metrics[metric] = wandb.run.summary[f"val_{metric}"]["max"]/NUM_RUNS

            
        run_metrics["model_name"] = config["model_name_or_path"]

        results_df.append(run_metrics, ignore_index=True)

        results_df.to_csv(PARENT_DIR + "results.csv")



            # how do I now get the max scores here??


            

    # set args to the right values
    # initialize wandb project names
    # and wandb logger & checkpoint logger & early stop callback
    # set summary metrics

    # for i in range(num_times):
    # train model
    # keep track of max accuracy, f1, recall, precision, and the number of epochs & minutes it took for each :) 
    # figure out how to get the run time of the model
    # save best ckpt in a specific directory 

#TODO: why are wandb checkpoints 1.73 GB... is it j because there are so many parameters???

#TODO: set up the configs for running on different datasets with different classes
#TODO: how do we get the summaries for each
#TODO: save checkpoints of the models as well in a folder?
#TODO: somehow keep track of the time that each model took to train?

Wed Aug  3 08:10:11 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 6000     Off  | 00000000:D8:00.0 Off |                  Off |
| 35%   30C    P8     6W / 260W |   2017MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▃▃▃▆▆▆███
train/accuracy,▁▁▁▁▁▁▁▁
train/f1,▁▁▁▁▁▁▁▁
train/loss,▄▇▇▅▁█▂█
train/precision,▁▁▁▁▁▁▁▁
train/recall,▁▁▁▁▁▁▁▁
trainer/global_step,▁▂▂▃▄▄▅▆▆▇██
val_accuracy,▁▁▁▁
val_f1,▁▁▁▁
val_loss,▅█▁▃

0,1
epoch,3.0
train/accuracy,1.0
train/f1,0.0
train/loss,0.37712
train/precision,0.0
train/recall,0.0
trainer/global_step,407.0
val_loss,0.31036


val_accuracy
val_f1
val_recall
val_precision


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 142 M 
-----------------------------------------------------
142 M     Trainable params
0         Non-trainable params
142 M     Total params
569.289   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▅▅██
train/accuracy,█▁
train/f1,▁█
train/loss,█▁
train/precision,▁█
train/recall,▁█
trainer/global_step,▁▂▅▆█
val_accuracy,▁▆█
val_f1,▁▅█
val_loss,█▁▁

0,1
epoch,2.0
train/accuracy,0.375
train/f1,0.54545
train/loss,0.75956
train/precision,0.375
train/recall,1.0
trainer/global_step,119.0
val_loss,0.37755


val_accuracy
val_f1
val_recall
val_precision


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 142 M 
-----------------------------------------------------
142 M     Trainable params
0         Non-trainable params
142 M     Total params
569.289   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  results_df.append(run_metrics, ignore_index=True)


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▅▅██
train/accuracy,█▁
train/f1,▁▁
train/loss,█▁
train/precision,▁▁
train/recall,▁▁
trainer/global_step,▁▂▅▆█
val_accuracy,▁▆█
val_f1,▁▅█
val_loss,█▁▁

0,1
epoch,2.0
train/accuracy,0.125
train/f1,0.0
train/loss,0.59284
train/precision,0.0
train/recall,0.0
trainer/global_step,119.0
val_loss,0.35808


val_accuracy
val_f1
val_recall
val_precision


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 142 M 
-----------------------------------------------------
142 M     Trainable params
0         Non-trainable params
142 M     Total params
569.289   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Validation: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation: 0it [00:00, ?it/s]

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▅▅▅███
train/accuracy,▆▁▅▅█▅
train/f1,▁▃▇▇█▇
train/loss,█▁▁▂▁▁
train/precision,▁▃████
train/recall,▁▃▆▆█▆
trainer/global_step,▁▂▂▄▅▅▆▇█
val_accuracy,▁▇█
val_f1,▁██
val_loss,█▂▁

0,1
epoch,2.0
train/accuracy,0.625
train/f1,0.72727
train/loss,0.30893
train/precision,0.8
train/recall,0.66667
trainer/global_step,326.0
val_loss,0.27744


val_accuracy
val_f1
val_recall
val_precision


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 142 M 
-----------------------------------------------------
142 M     Trainable params
0         Non-trainable params
142 M     Total params
569.289   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [None]:
results_df.to_csv("results-repeated.csv", index=False)

In [None]:
import os
os.environ["WANDB_START_METHOD"] = "thread"

In [None]:
def do_stuff():
    !nvidia-smi
    print('Hello world!')
do_stuff()

: 