In [None]:
import torch_geometric.transforms as T
from tqdm.auto import trange, tqdm
import pandas as pd
import ast
import itertools
from transformers import get_scheduler
import torch
import wandb
import evaluate
from itertools import cycle
import numpy as np
import random
import time
from datetime import datetime
import collections

In [None]:
import transformers
transformers.__version__

'4.18.0'

In [None]:
import torch_geometric as pyg
pyg.__version__

'2.2.0'

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# load processed files

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from transformers import AutoTokenizer
from transformers.models.bert.modeling_bert import BertModel
import torch

In [None]:
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
from dataclasses import dataclass
@dataclass
class myGNNoutput:
    loss: None
    logit: None
    emb: None

In [None]:
# English specific denpendency relations: https://universaldependencies.org/en/dep/
s = '''nsubj 	csubj
↳nsubj:pass 	↳csubj:pass
↳nsubj:outer 	↳csubj:outer
obj 	ccomp 	xcomp
iobj
obl 	advcl 	advmod
↳obl:npmod 	↳advcl:relcl
↳obl:tmod
vocative 	aux 	mark
discourse 	↳aux:pass
expl 	cop
nummod 	acl 	amod
  	↳acl:relcl
appos 	  	det
  	  	↳det:predet
nmod 	  	 
↳nmod:npmod
↳nmod:tmod
↳nmod:poss
compound 	flat
↳compound:prt 	↳flat:foreign
fixed 	goeswith
conj 	cc
  	↳cc:preconj
list 	parataxis 	orphan
dislocated 		reparandum
root 	punct 	dep'''
all_relations = []
s = s.split('\n')
for line in s:
    if '↳' in line:
        continue
    line = line.split('\t')
    for r in line:
        if r.strip() == '':
            continue
        all_relations.append(r.split(':')[0].strip())
if 'root' in all_relations:
    all_relations.remove('root')
    all_relations.append('ROOT')
    all_relations.append('case')      # manually add relation not in list
    all_relations.append('discourse')    # manually add relation not in list
all_relations = sorted(all_relations)

In [None]:
relation2id = {all_relations[i]:i for i in range(len(all_relations))}

In [None]:
def get_loader(df, add_syllables=False, col='pos_seqs', limit=None, batch_size=32, shuffle=True, max_length=128):
    data_list = []
    if limit is not None:
        dfnew = df.sample(frac=1).reset_index(drop=True)[:limit]
    else:
        dfnew = df
    for i in range(len(dfnew)):
        curr = dfnew.iloc[i]
        data = Data()
        edge_type = curr['hetoro_edges']
        data.edge_type_ids = torch.tensor([relation2id[t.split(':')[0]] for t in edge_type])
        data.edge_index = torch.tensor(curr['homo_edges']).T
        if data.edge_index.shape[1] >= max_length-1:
            print(f"data {i} too long length {data.edge_index.shape[1]}")
            continue
        tokens = tokenizer(' '.join(curr[col]), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
        data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
        data.y = torch.tensor([curr['author']])
        if add_syllables:
            data.num_syllables = torch.tensor([17]+curr['num_syllables']+[17]) 
        data_list.append(data)

    loader = DataLoader(data_list, batch_size=batch_size, shuffle=shuffle)
    return loader 

In [None]:
def preprocess_author_ids(df):
    assert 'author' in df, 'no column named "author" found in df'
    
    max_id, min_id = df['author'].max(), df['author'].min()
    mapping = {i+min_id:i for i in range(max_id-min_id+1)}
    df['author'] = df['author'].map(mapping)
    
    return df

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv, GATv2Conv, TransformerConv, PDNConv, global_mean_pool

GNNtype2layer = {'GATConv':GATConv, 'GATv2Conv':GATv2Conv, 'TransformerConv':TransformerConv, 'PDNConv':PDNConv}

class myHeteroGNN(torch.nn.Module):
    def __init__(self, num_layers, num_classes, num_dep_type, heads, hidden_dim, dep_emb_dim=32, add_self_loops=False, gnntype='GATConv', add_syllables=None):
        super().__init__()
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.pos_emb_dim = 32 # this is determined by POS Bert
        self.heads = heads
        self.hidden_dim = hidden_dim
        self.dep_emb_dim = dep_emb_dim
        self.add_syllables = add_syllables
        
        if add_syllables:
            self.num_syllables = 18 # the longest word has 17 syllables
            self.syllable_emb_layer = nn.Embedding(self.num_syllables, self.pos_emb_dim)
            
        self.GNNlayer = GNNtype2layer[gnntype]
        
        self.add_self_loops = add_self_loops
        self.dep_emb_layer = nn.Embedding(num_dep_type, self.dep_emb_dim)
        
        self.gnns = nn.ModuleList()
        self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.hidden_dim, heads = self.heads, add_self_loops=self.add_self_loops, edge_dim=self.dep_emb_dim))
        for i in range(self.num_layers-1):
            self.gnns.append(self.GNNlayer(self.hidden_dim * self.heads, self.hidden_dim, heads = self.heads, edge_dim=self.dep_emb_dim))
        
        self.classifier = nn.Linear(self.hidden_dim * self.heads, self.num_classes)
        self.lossfn = nn.CrossEntropyLoss()
        
    def forward(self, x, edge_index, edge_type_ids, batch, y, ptr, num_syllable=None, readout='pool'):
        if self.add_syllables:
            syllable_emb = self.syllable_emb_layer(num_syllable)
            x = x + syllable_emb
            
        edge_attr = self.dep_emb_layer(edge_type_ids)
        for i in range(self.num_layers):
            x = self.gnns[i](x, edge_index, edge_attr=edge_attr)
            x = F.relu(x)
        
        if readout == 'pool':
            x = global_mean_pool(x, batch) 
        elif readout == 'cls':
            x = x[ptr[:-1],:]
        
        x = F.dropout(x, training=self.training)
        logit = self.classifier(x)
        loss = self.lossfn(logit, y)
        return myGNNoutput(loss=loss, logit=logit, emb=x)

# CCAT50

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15

valid_loader = get_loader(df_val)
num_valid_steps = len(valid_loader)

LIMIT = [None]
NUM_LAYERS = [1,2,3,4]
LR = [1e-3, 5e-4]
HEADS = [1,2,3]
READOUT = ['pool']
GNNTYPE = ['TransformerConv'] # 'GATConv', 'GATv2Conv', 
HIDDEN_DIM = [32]
DEP_EMB_DIM = [16,32,64]
ADD_SELF_LOOPS = [False]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype,
                        add_syllables=add_syllables
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables

    run = wandb.init(project="hetero POS GNN", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread")
                    )
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/72 [00:00<?, ?it/s]

data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[34m[1mwandb[0m: Currently logged in as: [33mcpuyyp[0m ([33mfsu-dsc-cil[0m). Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▄▅▇▇▇▇▇▇▇▇▇▇██▇█▇▇▇██▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79012
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▆▇▇▇▇█▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▆█▇▇▇▇█▇▇▇█▇█████▇████████▇▇▇█▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76852
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▇▇▆▆▆▇▇▇▇▇██▇█████▇███▇██▇██▇▇█▇▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▄▆▆▇▇▇▇▇▇▇█▇███████▇█▇▇█▇█▇██▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7716
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇█▇▇▇▇██████▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79012
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▃▆▇▇▇▇▇█▇▇▇▇█▇█▇█▇█▇█████▇▇▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▇▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█▇▇▇▇████▇███▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.81481
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆▇▇▇▇▇▇▇▇▇█▇██▇█▇▇▇▇▇▇▇▇█▇▇▇▇▇█▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▅▆▆▇▇▇▇▇▇███▇███████▇▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74691
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76235
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▄▆▇██▇▇▇▇▇█▇▇▇▇█▇████████▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▄▄▆██▇▇█▇▇█▇▇██▇▇▇███▇██▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▅▇██▇█▇█▇▇▇▇▇█▇██████████▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▁▂▄▅▇▆▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▄▅▇████▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77469
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▂▆▅▆▆▇▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78704
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▆▆▆▇▇▇▇█▇█▇▇▇▇▇▇▇▇▇▇▇▇██▇▇█▇▇██▇▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7963
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▄▇▇▇▇▇█████▇█▇███▇██████▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79012
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▅█▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73765
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▄▇▇▇████▇███████▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▇▇▆▇▇█▇▇███▇█▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▅▇▇▇▆█▇███▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▇▇██▇▇▇█▇█▇▇▇█▇▇▇▇▇█▇▇████▇▇█▇█▇█▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76852
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄██▇████████████▇▇██▇██▇▇▇█▇██▇██▇█▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆▇▆▇▇▇▇▇▇██▆██▇▇█▇▇▇█▇▇██▇█▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80556
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▇▇▆▇▇▇▇▇▇▇▇█▇▇▇▇██▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▆▇▇▆▇▇▇▇▇▇▇▇▇▇█▇██▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80247
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▅▇▇▇▇▇▇▇▇▇▇██▇██▇██████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78086
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▇▇▇▆▇▆▇▇▇▇▇▇▇▇██▇█▇████▇████▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7963
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▆█▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7716
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂██▇▇█▇█▇▇▇███▇▇▇▇███▇██████▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▇▆▇▇███▇██▇██▇█▇█▇██▇██████▇███▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▂▇▇▆▇▇▇▇█▇█▇█▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▆▇█▇▇▇█▇▇████▇█▇█▇█▇▇█▇██████▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▆▇▆▆██▇▇▇████████████████▇██▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▄▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇██▇██████████▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80556
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅▅▆█▇█▇█▇▇▇█████▇█▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73148
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▇▆▇▆▅█▇▆█▇█▇▇▇█▇▇▇█▇▇█▇▇▇▇▇▇▆▇▇██▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▇▆▇▆▇▇▅▇▇██▇▇█▇▅▇▇▇▇▇██▇▇███▇██▇██▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78704
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333947658538818, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▇▇▇▇▇▅██▇██▇██▇█▇██▇▇█▇█▇█▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80556
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▇▇█▇█▆███████▇▇██▇▇▇▇▇█▇▇▇▇█▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▄▇▇▇█▇▇▇▆▆▇▇▇▇▆▆▇▇▇▆▇▇▆▆▆▆▆▇▆▆▆▆▆▇▆▆▆▆
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇▇▇▆▆█▇███▆▇▇▆█▇▇▇▆▆▇▇▆▆▆▇▇▇▇▇▇▆▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75926
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▄▄▄▆▇▇▇▇▇████████████▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7963
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▅▇▇▇▇▇▇█▇▇▇█████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79012
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▄▆▇▇▇▇▇▇▇▇█▇█▇██████▇█████▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80247
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▄▇▇▇█▇▇██████████▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79321
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▇▇▇▇██▇▇▇█████████▇████▇▇██▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78704
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▇▇▇█▇████▇███████████▇▇███▇██▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78704
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▇▇▇▇▇█▇█████████████▇███▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅█▇▇▇█▇███▇████▇▇███████████████▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78704
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▅▇▇▇▇▇▆▇▇█▇█████▇██▇█▇██▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78086
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▆▅▆█▇█▇█████▇█▇█▇█▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79321
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▅▆████▇████▇███▇▇█▇█▇█▇▇▇█▇▇▇▇▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76543
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▇▇▇█▇█████▇▇▇███████▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78086
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▇▇▆▇█▇█▇▇▇▇▇█▇▆▇▇▇▇▇▇▆▇▇▅▅▆▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75309
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▆▇▇▇▆▇█▇██▇▇█▇▆█▇█████▇▇▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80864
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆▆▆▅█▇▆██▆█▇▇▇▆▇▇▇█▇▇▇▇▇▇▆▇▇▇▇▆▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76852
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▃▁▃▁▇▆█▇▇▇▇▇▇▇█▇███▇██▇██▅▇▇██▇████▇▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8179
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333898385365804, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇██▇██▆█▆▇█▆▇▆▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7716
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▇▅▇██▆█████▇▇█▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7716
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▆▆▆▇▇████▇█▆▇▆▇██▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74691
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▆▇▇▆▇▇▆▇▇▇▇▇█▇██████▇█▇▇██▇██▇▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79321
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▅▇▇█▇██▇▇█▇██████████████████████▇█▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▅▇▆▇▇▆█▇▇▇█▇█▇██▇▇▇██▇▇███▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▇▇▇▅▇▇█▇█▇██▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77469
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▆▇▇▇▇████▇█▇██▇█▇▇▇▇▇▇▇▇▇▇▆▆▇▇▇▇▆▆▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76852
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▆▆▇▇▆▇█▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77778
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▇▇▆▇▆▇▇▇▇▇▇▇▇█▇███████▇█▇▇▇▇▇▇▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.81481
global_step,2460.0


data 511 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▆▇▆█▇▇█▇▇▆█████████▇███▇▇▇▇▇▇▇▇▇▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78395
global_step,2460.0


## testing dataset size

In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val)
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['GATConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="hetero POS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


data 1213 too long length 134


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▅▆▇▇▆▇█▇▇██████████▇██▇██████████
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.75758
global_step,2160.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338650067647295, max=1.0…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▃▇▅▆▆▇▇█▆▇████▇▇█▇▇█▇▇▇▇█▇▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.63333
global_step,2400.0


data 527 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333900769551595, max=1.0)…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▂▃▃▃▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2400.0


data 111 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▆▇▇█▇▇███████████▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71212
global_step,2400.0


data 660 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▃▃▄▅▇▇▇▅▆▆█▇█▇▇▇▇▇▇▇▇███▇▇▇██▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2400.0


data 798 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▆▇▇▇███████████████▇███▇▇▇▇▇▇█▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72424
global_step,2400.0


data 516 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▂▃▄▃▃▆▆▅▆▇▆▆▆▆▇█▆█▇▇██▇█▇█▇█▇███▇█
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▄▃▆▇▅▇█▇▇▇▇▇█▇█▇▇███▇▇███▇█████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2400.0


data 707 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▃▃▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇██████▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▅▆▆████▇██▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.71212
global_step,2400.0


data 668 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▅▄▆▆▇▇▇▇▇▇▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▇▆▇▇▇█▇▇█▇██▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.72727
global_step,2400.0


data 668 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▄▄▄▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.69394
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▆▇███▇▇██▇▇█▇██▇▇██▇██▇███▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇████▇▇▇▇▇▇█▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▆▇▇▇█▇██▇▇▇▇▇▇▇▇▇▇███▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁█▇▇▇▇▆▆▅▅▅▅▅▅▄▅▄▄▄▄▄▄▄▄▃▄▄▄▄▄▅▄▄▄▄▄▄
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.61818
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅▃▃▃▄▄▅▆▇▇▇▇▇▇▇████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68788
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁████▇▇▇▇▆▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▄▃▄▄▅▅▆▇▇▇▇▇▇▇▇▇█████▇█▇████▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.65758
global_step,2400.0


## testing gnntype

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15

valid_loader = get_loader(df_val)
num_valid_steps = len(valid_loader)

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-4]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['GATConv', 'GATv2Conv', 'TransformerConv'] 
HIDDEN_DIM = [32]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [True, False]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="hetero POS GNN (GNNtype)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread")
                    )
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/12 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▄▅▆▆▅▆▆▇▇▆▆▇▆▇█▇▇▇█▇██▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77273
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▅▆▆▇▇▇▇▇▇█▇▇██▆▇███▇▇▇▇▇▇█████▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68182
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▁▃▅▃▆▇▇▇▆▅▆▇▇▆▇▆▇▇▇▇▇▇▇▇▇█▇███▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.77576
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▃▆▇▇▇▆▆▇▆▇▇▇▇▆▇▇▇█▆▇█▇██▇██▇███▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74848
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▄▆▇▅▇▇█▇███▇█▇█▇▇▇█▇▇██▇▇█▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▆▅▅▆▇▇▇▇▇▇▇█▇█▇█▇▇█▇██▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80909
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▄▄▅▆▆▆▇▇▇▇▇█▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78788
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▅▅▅▇▆▆▇▇▇▇█▇▇▇▇█▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71212
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▃▄▄▅▆▆▇▆▇▇▇▇▇▇█▇▇▇▇▇▇██▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▄▇▆▇▇▇▇▇▇█▇▇▇▇▇█▇▇▇████▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▅▆▇▇▇▇▇▇██▇▇█▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78788
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▅▆▇▆▆▆▇▇███▇███▇▇▇▇███████▇██████▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79697
global_step,2460.0


## testing num of authors

In [None]:
epochs = 60
warmup_ratio = 0.15
data_folder = '../../data/CCAT50/processed'

IDS = [[0,1], [2,3], [0,1,2], [3,4,5], [0,1,2,3], [4,5,6,7]]
NUM_SENTENCES = [2]
LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-4]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['TransformerConv'] 
HIDDEN_DIM = [32]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [True]

ARGS = itertools.product(IDS, NUM_SENTENCES, LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(IDS, NUM_SENTENCES, LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    ids, num_sentences, limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    str_author = ','.join(map(str, ids))
    file = f"author_{str_author}_sent_{num_sentences}_train.csv"
    df = pd.read_csv(f'{data_folder}/{file}')
    for col in cols_to_eval:
        df[col] = df[col].apply(ast.literal_eval)
    df = preprocess_author_ids(df)
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    file = f"author_{str_author}_sent_{num_sentences}_val.csv"
    df_val = pd.read_csv(f'{data_folder}/{file}')
    for col in cols_to_eval:
        df_val[col] = df_val[col].apply(ast.literal_eval)
    df_val = preprocess_author_ids(df_val)
    valid_loader = get_loader(df_val)
    num_valid_steps = len(valid_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=len(ids), 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['ids'] = ids
    wconfig['num_authors'] = len(ids)
    wconfig['num_sentences'] = num_sentences
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="hetero POS GNN (num_authors)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread")
                    )
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        acc = evaluate.load('accuracy')
        f1 = evaluate.load('f1')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            acc.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
            f1.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = acc.compute()
        evaluation.update(f1.compute(average='macro'))
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/12 [00:00<?, ?it/s]

data 908 too long length 134


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▅▆▆▇▇▇▇▇▇▇▇▇████████▇████████████████
f1,▁▁▃▆▆▇▇▇▇█████▇█████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8
f1,0.7981
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅▃▃▇▆▇▇▇▇▇█▇████████▇████▇███▇█▇▇█▇▇███
f1,▁▅▁▂▇▇▇▇▇▇▇██████████▇██████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
f1,0.74432
global_step,2460.0


data 176 too long length 146
data 784 too long length 133
data 230 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▄▄▇▇███▆▇██▇█▆▇██▇▇▇██▇██████████████
f1,▁▄▅▅▅▇████▇▇██▇█▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80519
f1,0.80506
global_step,2460.0


data 176 too long length 146
data 784 too long length 133
data 230 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▃▅▆▇███▇▇▇██▇██▆█████████████████████
f1,▁▁▅▄▆▆▇███▇▇███▇██▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.81494
f1,0.81423
global_step,2460.0


data 908 too long length 134
data 1469 too long length 146
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/3660 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▄▅▆▆▇▇█▇██▇▇▇▇▇▇█▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇
f1,▁▂▂▄▆▆▇█▇█▇██▇▇▇▇▇▇█▇▇█▇▇█▇▇▇█▇▇▇▇█▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57025
f1,0.57392
global_step,3660.0


data 908 too long length 134
data 1469 too long length 146
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/3660 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▅▆▆▆▇▇▆▇▇█▇███▇█▇█▇██▇▇█████▇▇██▇██
f1,▂▁▁▂▅▅▅▆▆▆▇▅█▇█████▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57231
f1,0.5737
global_step,3660.0


data 144 too long length 133
data 900 too long length 132
data 1556 too long length 133
data 1794 too long length 128
data 76 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/3420 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▃▅▅▅▆▇▇▇███████▇█▇█▇█████████████████
f1,▂▁▄▄▆▆▅▆▇█▇███████▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73171
f1,0.7354
global_step,3420.0


data 144 too long length 133
data 900 too long length 132
data 1556 too long length 133
data 1794 too long length 128
data 76 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/3420 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▄▅▅▆▆▆▇▆█▇▆▇█▇█▇█▇██▇█████▇████▇▇██
f1,▁▁▁▁▅▆▆▆▇▇▇▇▇█▇▇████▇█▇██▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69401
f1,0.69725
global_step,3420.0


data 908 too long length 134
data 1469 too long length 146
data 2077 too long length 133
data 560 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/4860 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▅▇▇▇█▇███████▇██▇▇▇▇█▇█▇███▇██▇█▇▇█
f1,▁▁▃▄▆▆▇▇▇█▇█████████████████▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.54232
f1,0.54852
global_step,4860.0


data 908 too long length 134
data 1469 too long length 146
data 2077 too long length 133
data 560 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/4860 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▄▄▅▅▅▇▇█▆▇▇▇██▇████████████████████
f1,▁▂▂▃▄▄▄▅▅▅█▇█▆▇▇▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.52194
f1,0.52256
global_step,4860.0


data 257 too long length 132
data 913 too long length 133
data 1151 too long length 128
data 1281 too long length 129
data 2093 too long length 151
data 2095 too long length 313
data 2096 too long length 316
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/4620 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▅▆▆▆▆▆▇▆▇▇▆▇▇▇▇▇▇█▇███▇▇█████████████
f1,▁▂▄▅▇▆▇▇▇▇▇▇▇▇██▇██▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.61815
f1,0.62313
global_step,4620.0


data 257 too long length 132
data 913 too long length 133
data 1151 too long length 128
data 1281 too long length 129
data 2093 too long length 151
data 2095 too long length 313
data 2096 too long length 316
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/4620 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▅▆▆▇▇▆▇▇▇▇▇▇██████▇██████████████████
f1,▁▃▂▄▅▆▆▇▆▇▇▇▇▇███████▇██████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6025
f1,0.61044
global_step,4620.0


## testing upos

In [None]:
file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
df['homo_edges'] = df['homo_edges'].apply(ast.literal_eval)
df['hetoro_edges'] = df['hetoro_edges'].apply(ast.literal_eval)
df['pos_seqs'] = df['pos_seqs'].apply(ast.literal_eval)
df['upos_seqs'] = df['upos_seqs'].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
df_val['homo_edges'] = df_val['homo_edges'].apply(ast.literal_eval)
df_val['hetoro_edges'] = df_val['hetoro_edges'].apply(ast.literal_eval)
df_val['pos_seqs'] = df_val['pos_seqs'].apply(ast.literal_eval)
df_val['upos_seqs'] = df_val['upos_seqs'].apply(ast.literal_eval)

In [None]:
checkpoint = '/scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val, col='upos_seqs')
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['GATConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [False, True]
ADD_SYLLABLES = [False, True]


ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, col = 'upos_seqs', limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="hetero UPOS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 245 too long length 134


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Exception in thread Thread-637:
Traceback (most recent call last):
  File "/home/jz17d/anaconda3/envs/torch/lib/python3.9/site-packages/wandb/apis/normalize.py", line 26, in wrapper
Exception in thread Thread-638:
Exception in thread Thread-639:
Traceback (most recent call last):
  File "/home/jz17d/anaconda3/envs/torch/lib/python3.9/site-packages/wandb/apis/normalize.py", line 26, in wrapper
Traceback (most recent call last):
  File "/home/jz17d/anaconda3/envs/torch/lib/python3.9/site-packages/wandb/apis/normalize.py", line 26, in wrapper
    return func(*args, **kwargs)
Exception in thread Thread-636:
Exception in thread Thread-640:
    return func(*args, **kwargs)
  File "/home/jz17d/anaconda3/envs/torch/lib/python3.9/site-packages/wandb/sdk/internal/internal_api.py", line 1636, in upload_urls
Traceback (most recent call last):
  File "/home/jz17d/anaconda3/envs/torch/lib/python3.9/site-packages/wandb/sdk/internal/internal_api.py", line 1636, in upload_urls
  File "/home/jz17d/anaco

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


0,1
accuracy,███████▇▆▆▆▆▆▁▆▇▁▁▂▆▁▆▃▇▆▇▇▃▇▅▇▇▇▆▇▇▇▄▇▆
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.51212
global_step,1504.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338586489359535, max=1.0…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██▇▇█▇▇▇▇█▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70606
global_step,2400.0


data 323 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▅▄▆▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█▇▇█▇▇▇█▇█▇▇█▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2400.0


data 213 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▅▆▆▇▇▇▇▇▇██▇██▇██████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72727
global_step,2400.0


data 672 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▄▅▆▆▅▆▆▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█▆█▇█▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.62424
global_step,2400.0


data 13 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▅▆▆▇▆▇▇▇▇▇▇▇▇█▆███▆▇▇▇▇█▇▇██▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▄▅▅▅▅▅▅▆▅▆▆▆▆▇▆▇▇▇▇▇▇█▇██▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67273
global_step,2400.0


data 527 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▃▃▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█▇▇███▇█▇█▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64545
global_step,2400.0


data 111 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▄▅▆▅▆▆▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67273
global_step,2400.0


data 660 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333894411722819, max=1.0)…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▄▅▆▆▆▇▇▇▇▇▇█▇█▇▇▆█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.65455
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇███████▇▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.68788
global_step,2400.0


data 516 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▃▃▃▅▆▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.65152
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▄▅▆▅▇▆▇▆▇▇▇▇▇▇█▇▇▇█▇▇▇▇▇▇████▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.65152
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▆▇▇▇▇▇▇█▇███▇▇▇█▇▇████████████▇█▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70606
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▄▅▄▅▅▅▅▆▆▅▆▆▆▆▇▇▇▇▇▇▇▇▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▆▃▃▃▇▆▆▆▇▇▇▇▇▇█▇▇▇▇▇████▇▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▄▄▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇██████▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.62424
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▅▅▅▆▇▇██▇███████▇█████▇▇▇█▇▇▇█▇▇▇█▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68485
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▄▄▅▄▅▅▆▅▆▆▇▆▇▇▇▆▇▇▇▇▇▇▇▇█▇▇▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68485
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▆▅▅▆▆▆▇▇▇▇▇▇▇█▇█▇▇▇▇███████▇█████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.66364
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▃▄▅▆▆▅▅▅▅▅▅▆▆▅▆▆▆▇▆▆▆▇▇▇▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.63636
global_step,2400.0


## adding num of syllables

In [None]:
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/pos_mlm_8/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
df['homo_edges'] = df['homo_edges'].apply(ast.literal_eval)
df['hetoro_edges'] = df['hetoro_edges'].apply(ast.literal_eval)
df['pos_seqs'] = df['pos_seqs'].apply(ast.literal_eval)
df['upos_seqs'] = df['upos_seqs'].apply(ast.literal_eval)
df['num_syllables'] = df['num_syllables'].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
df_val['homo_edges'] = df_val['homo_edges'].apply(ast.literal_eval)
df_val['hetoro_edges'] = df_val['hetoro_edges'].apply(ast.literal_eval)
df_val['pos_seqs'] = df_val['pos_seqs'].apply(ast.literal_eval)
df_val['upos_seqs'] = df_val['upos_seqs'].apply(ast.literal_eval)
df_val['num_syllables'] = df_val['num_syllables'].apply(ast.literal_eval)


In [None]:
epochs = 60

warmup_ratio = 0.15

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['GATConv', 'GATv2Conv', 'TransformerConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [False, True]
ADD_SYLLABLES = [False, True]


ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops, add_syllables = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype,
                        add_syllables=add_syllables
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    
    run = wandb.init(project="hetero POS GNN (syllables)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/24 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▇▇▆▇▇█▆▇▇▇███▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72424
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▆▇▇▇██▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▂▂▂▂▃▃▆▆▆▇▇█▇▇▇▇▇▇▇██▇████▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▄▄▃▅▆▆▆▄▇▇█▇██▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79394
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▇▇▇▇▇▇█▇▆▇███▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▆▇████▇▇██▇████▇████▇██▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78182
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▅▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇██▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▄▆▆▆██████████▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333896001180013, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▆▇▇▆▇███▇▇▇▇█████▇█████▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▆▆▆▇▇▇▇▇▇██▇████▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80606
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▆█▇▆████▇█▇██████▇█▇█▇▇▇██▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78485
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▆▆▆▇▇▇█▇███▇████▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▃▃█▇▇▇▇▇▇▇▇▇▇█▇███████████▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72424
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▇▇▇▇▇▇▇▇▇▇▇▇▇████████▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76667
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▄▄▅▆▆▆▆▆▆▇▇▇▇█▇████▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

FileNotFoundError: Couldn't find a module script at /home/jz17d/Desktop/style-models/code/POS/accuracy/accuracy.py. Module 'accuracy' doesn't exist on the Hugging Face Hub either.

## 2 authors for the best

In [None]:
epochs = 100
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3]
HEADS = [2]
READOUT = ['pool']
GNNTYPE = ['TransformerConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False, True]
REPEAT = list(range(5))


ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype,
                        add_syllables=add_syllables
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    
    run = wandb.init(project="hetero POS GNN (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread"))
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/10 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▆▄▆▇▇▇▆▇▆▇▇▇▇███▇▇█▇██▇█████████████
best_accuracy,▁▁▁▁▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78788
best_accuracy,0.79394
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▆▇▇▇▇▇▇▇███▇▇██▇████████▇█▇█████▇█▇
best_accuracy,▁▁▁▁▅▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76061
best_accuracy,0.77879
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▄▆▇██▇████████▇█▇▇▇█▇▇▇█▇▇▇█▇▇▇▇▇▇▇▇
best_accuracy,▁▁▁▂▅▆▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
best_accuracy,0.76061
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▆▇▇▇▇█▇▇▇▇██████▇███▇█▇▇███████▇████
best_accuracy,▁▂▂▂▆▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78182
best_accuracy,0.78485
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▆▆▇▇████▇████▇█▇████████████████████
best_accuracy,▁▁▁▃▅▆▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7697
best_accuracy,0.79091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▆▇▄▇▇██▇██▇▇█▇▇▇▇▇████▇▇▇▇█▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▁▁▆▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.80303
best_accuracy,0.8303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▆▅▇▇█▇███████████▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▂▂▆▆▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75455
best_accuracy,0.79091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▆▆▇▇▆███▇███▇█▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇▇▇▇▇▇
best_accuracy,▁▂▂▃▆▆▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.81515
best_accuracy,0.84848
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▃▆▅▇▇█▇████████▇▇█▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▁▁▂▃▆▆▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.79091
best_accuracy,0.81818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▆▇▇████▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▇▇▇▇▇▇
best_accuracy,▁▁▁▁▆▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70606
best_accuracy,0.74545
global_step,2460.0


# imdb

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/imdb/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/imdb/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-5]
HEADS = [4]
READOUT = ['pool']
GNNTYPE = ['GATv2Conv','TransformerConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False, True]
REPEAT = list(range(5))


ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype,
                        add_syllables=add_syllables
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    
    run = wandb.init(project="hetero POS GNN imdb (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread"))
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▆▇▆▇▇██▇▇▇██▇██████▇████▇▇█▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▆▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.877
best_accuracy,0.88874
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▅▆▇▆▇▇▅█▇▆▇█████▇▇█▇▇█▇▇▇▇█▇▇▇▇▇██▇▇▇
best_accuracy,▁▁▅▅▆▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87341
best_accuracy,0.8832
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆▆▆▆▇▇▆▇██████▇▇██▇▇█▇█▇▇▇▇▇▇█▇█▇█▇██
best_accuracy,▁▁▅▆▆▆▆▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87928
best_accuracy,0.88613
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▆▇▅▇█▇▇██▇▇█▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▆▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86623
best_accuracy,0.88026
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▆▄▇▇▇▇▇██▇▇█▇██████▇█▇▇▇█▇▇█▇█▇█▇▇▇▇
best_accuracy,▁▁▃▄▆▆▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87667
best_accuracy,0.88483
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▇▆▆▇█▇█████▇▆▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86688
best_accuracy,0.88777
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▇▆█▇▆▇▆█▇▇██▇██▇▇█▇▇█▇▇▇▆▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8633
best_accuracy,0.88189
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▃▆▅▇▇▆▇█▇▇█████▇▇█▇▆▇▇▇▇▇▇▇▆▆▇▇▆▆▇▇▇▇
best_accuracy,▁▁▄▄▆▆▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86688
best_accuracy,0.89005
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▇▇▇▇▇▇█▇▇██▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87047
best_accuracy,0.88483
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▃▇▁▇▇▆█▇██▇▇█▇█▇▇▇▇▆▇▇▇▇▆▆▇▇▇▆▇▇▇▆▇▆▇
best_accuracy,▁▁▅▅▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86427
best_accuracy,0.88548
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▆▇▇█▇██▇▇█▇▇▇▇▇▇▇█▇█▇▇██▇▇█▇██▇▇█▇▇▇█
best_accuracy,▁▃▆▆▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.88385
best_accuracy,0.89201
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▆▆▅▇▇▆▇█▇▇█▇█▆▇▇█▇▇▇██▇█▇█▇▇█▇▇▇▇▇▇█▇▇
best_accuracy,▁▁▅▆▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87896
best_accuracy,0.89168
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▆▆▇▇██▇██▇▇▇████▇▇████▇███▇█▇▇▇▇█▇████
best_accuracy,▁▁▆▆▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.88679
best_accuracy,0.89038
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▆▅▇▇▇█▇██████▇▇▇▇▇██▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▆▆▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87113
best_accuracy,0.8845
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▆▇▇████▇▇█▇▇▇▇▇▄█▇▇██▇▇▇██████▇██████
best_accuracy,▁▃▅▆▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.88483
best_accuracy,0.88907
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▆▇█▇▄▇█▇▇▇▇▇▇▇▇▇▅█▇▇██▇▇▇▇▇▇██▇█▇▇▇▇▇
best_accuracy,▁▃▅▆▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86949
best_accuracy,0.88091
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▇█▇▇▇▇▇▇█▇█▅██▇▆▇▇▇▇█▇▇▇▇█▇▇▇▇█▇█▇██▇
best_accuracy,▁▂▅▇████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86688
best_accuracy,0.87471
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▆▆▇█▄█▇▇█▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇▇▇▇██▇█████▇
best_accuracy,▁▁▅▆▇▇██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87374
best_accuracy,0.8832
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▆▇▄█▇▇█▇▆█▃██▇▇▇▇▇▇▆█▇█▇██▇▇▇▇██▇███▇▇
best_accuracy,▁▁▆▆▆▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87308
best_accuracy,0.88613
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▅▆███▇▇▇███▇▇█▇▇▆███▇▇█▇▇█████▇▇████▇█
best_accuracy,▁▁▅▅▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87896
best_accuracy,0.89396
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇▇▇▇██████████████████████████████
best_accuracy,▁▇▇▇▇▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.83883
best_accuracy,0.83948
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▂▃▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████████
best_accuracy,▁▁▁▁▁▁▁▂▃▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84078
best_accuracy,0.84176
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█▇█▇█████████████████
best_accuracy,▁▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84731
best_accuracy,0.84763
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▂▂▃▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇█████████
best_accuracy,▁▁▁▁▁▁▁▂▂▃▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84176
best_accuracy,0.84176
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇▇▇▇▇▇████████████████████████████████
best_accuracy,▁▁▇▇▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84241
best_accuracy,0.84307
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████▇█████████
best_accuracy,▁▁▁▁▁▁▁▁▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84339
best_accuracy,0.84437
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇▇▇▇▇▇▇▇▇█████████████████████████████
best_accuracy,▁▁▇▇▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.83785
best_accuracy,0.83785
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇▇▇▇██████████████████████████████
best_accuracy,▁▇▇▇▇▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.83654
best_accuracy,0.83817
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇▇▇▇▇██▇██████████████████████████
best_accuracy,▁▇▇▇▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85383
best_accuracy,0.85383
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇▇████████████████████████████████
best_accuracy,▁▇▇▇▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.81729
best_accuracy,0.8186
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇███████████████
best_accuracy,▁▁▁▁▁▂▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85742
best_accuracy,0.85905
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇▇████████████████████████████████
best_accuracy,▁▇▇▇▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86362
best_accuracy,0.86362
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██▇█████████████
best_accuracy,▁▁▁▁▁▂▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86754
best_accuracy,0.86852
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇█▇▇▇███████████████
best_accuracy,▁▁▁▁▁▂▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86036
best_accuracy,0.86264
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▅▅▆▆▆▇▇▇▇▇▇▇▇█▇███████████████████
best_accuracy,▁▃▃▃▃▃▅▅▆▆▆▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8721
best_accuracy,0.87341
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇█████████████████████████████████
best_accuracy,▁▇▇▇▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86199
best_accuracy,0.86264
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██████████████
best_accuracy,▁▁▁▁▁▁▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85742
best_accuracy,0.8584
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▆▆▆▆▆▇▇▇▇▇▇▇▇██████████████████████████
best_accuracy,▁▆▆▆▆▆▇▇▇▇▇▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8708
best_accuracy,0.87145
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▄▅▆▆▆▆▇▇▇▇▇▇█▇▇▇▇█████████████████
best_accuracy,▁▁▁▁▁▂▄▅▆▆▆▆▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8646
best_accuracy,0.86688
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇▇▇▇▇▇█████████████████████████████████
best_accuracy,▁▇▇▇▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86623
best_accuracy,0.86721
global_step,17280.0


# guardian

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

str_topic='0,1'
str_author='0,1'
num_sent_per_text = 2

split = 'train'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

split = 'val'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)
    
split = 'test'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df_test = pd.read_csv(file)
for col in cols_to_eval:
    df_test[col] = df_test[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-5]
HEADS = [4]
READOUT = ['pool']
GNNTYPE = ['GATv2Conv','TransformerConv']
HIDDEN_DIM = [16]
DEP_EMB_DIM = [32]
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False, True]
REPEAT = list(range(5))


ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, HEADS, READOUT, GNNTYPE, HIDDEN_DIM, DEP_EMB_DIM, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, heads, readout, gnntype, hidden_dim, dep_emb_dim, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    test_loader = get_loader(df_test, limit = limit, add_syllables=add_syllables)
    num_test_steps = len(test_loader)
    
    model = myHeteroGNN(num_layers, 
                        num_classes=2, 
                        num_dep_type=len(all_relations), 
                        heads=heads,
                        hidden_dim=hidden_dim,
                        dep_emb_dim=dep_emb_dim, 
                        add_self_loops=add_self_loops,
                        gnntype=gnntype,
                        add_syllables=add_syllables
                        )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['heads'] = heads
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['hidden_dim'] = hidden_dim
    wconfig['dep_emb_dim'] = dep_emb_dim
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    
    run = wandb.init(project="hetero POS GNN guardian (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method="thread"))
    
    best_evaluation = collections.defaultdict(float)
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)
        
        # train acc
        model.eval()
        metric = evaluate.load('accuracy')
        for data in train_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        train_evaluation = metric.compute()
        
        # val acc
        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        val_evaluation = metric.compute()
        
        # test acc
        model.eval()
        metric = evaluate.load('accuracy')
        for data in test_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        test_evaluation = metric.compute()
        
        # logging
        evaluation = {'train_accuracy':train_evaluation['accuracy'], 'val_accuracy':val_evaluation['accuracy'], 'test_accuracy':test_evaluation['accuracy']}
        wandb.log(evaluation, step=pbar.n)
        for key in evaluation:
            best_evaluation[f'best_{key}'] = max(best_evaluation[f'best_{key}'], evaluation[key])
        wandb.log(best_evaluation, step=pbar.n)
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▄▄▄▄▄▄▄▄▄▄▅▅▆▆▇▇▇▇▇██████
test_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▂▃▃▄▄▅▄▄▆▅▅▇▅▅
val_accuracy,▁▄▄▄▄▄▄▄▄▄▄▅▅▆▆▇▇▇▇▇█▇▇█▇▇

0,1
best_test_accuracy,0.62162
best_val_accuracy,0.62343
test_accuracy,0.51544
val_accuracy,0.60831


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03334117730458577, max=1.0)…

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▅▅▆▇▇▇▇▇▇███████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▅▅▆▇▇▇▇▇▇███████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▆▇▄▅▆▆▆█▆▆▇▇▆▅██▇▇▆▇▇▇▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▄▅▄▄▆▆▆▆▇▆▇▇▇▆▇▇██▇███████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▃▃▆▇▅▅▇▆▆█▇▆▇█▆▅██▇▇▆██▇█▇▇▇

0,1
best_test_accuracy,0.60232
best_trani_accuracy,0.96117
best_val_accuracy,0.68199
test_accuracy,0.56564
trani_accuracy,0.95955
val_accuracy,0.66625


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▂▂▄▄▅▆▆▆▇██████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▂▃▅▅▅▇▇▇▇▇▇████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▂▂▄▃▅▆▆▅▇▆▅█▅▇▅█▇▆▆▇▆█▆█▇▆▆▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███████
val_accuracy,▁▁▁▁▁▁▁▁▁▂▃▅▄▅▇▇▅▇▆▅█▆▇▅█▇▆▆▇▇█▆██▇▇▇▇▇▇

0,1
best_test_accuracy,0.58494
best_trani_accuracy,0.96926
best_val_accuracy,0.67003
test_accuracy,0.55212
trani_accuracy,0.96117
val_accuracy,0.65743


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████████████
best_trani_accuracy,▁▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████████
best_val_accuracy,▁▄▄▄▄▄▄▄▄▅▇▇▇▇▇▇▇▇▇█████████████████████
test_accuracy,█▁▁▁▁▁▁▁▁▂▅▂▂▄▆▃▃▅▆▇▆▇█▅▅██▆▅▇▄▆▇▆█▅▆▆▆▆
trani_accuracy,▁▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███▇████████████████
val_accuracy,▁▄▄▄▄▄▄▄▄▅▆▅▄▆▇▅▅▆▇█▇██▆▆██▇▇▇▆▇█▇█▇▇▇▇▇

0,1
best_test_accuracy,0.62548
best_trani_accuracy,0.96764
best_val_accuracy,0.6864
test_accuracy,0.55598
trani_accuracy,0.96278
val_accuracy,0.65806


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇▇▇▇▇▇▇█████████████████
best_trani_accuracy,▁▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
best_val_accuracy,▁▃▃▃▃▃▃▃▄▄▅▆▇▇▇▇████████████████████████
test_accuracy,▇▁▁▁▁▁▁▁▁▂▃▄▆▅▅▆█▇▆▇▅▆▆█▄▄▄▆▆▇▅▅▅▅▆▆▆▅▅▅
trani_accuracy,▁▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
val_accuracy,▁▃▃▃▃▃▃▃▄▄▅▆▇▆▇▇█▇▇▇▇▇▇█▆▆▆▇▇█▆▆▆▇▇▇▇▆▇▇

0,1
best_test_accuracy,0.61004
best_trani_accuracy,0.96117
best_val_accuracy,0.68073
test_accuracy,0.52124
trani_accuracy,0.95631
val_accuracy,0.6461


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▃▄▄▄▄▅▅▇▇▇▇▇▇▇▇▇▇██████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▃▃▃▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▃▄▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇██████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▃▄▄▂▃▅▄▇▆▃▃▆▇▆▆▇▆█▆▆▇▇▇▆▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▃▂▃▄▃▅▅▄▄▆▆▆▇▇▇▇▇▇▇▇▇████████
val_accuracy,▁▁▁▁▁▁▁▁▁▃▄▄▂▄▆▄▇▇▄▄▇▇▆▇▇▆█▆▇▇▇▇▆▆▆▇▇▆▇▆

0,1
best_test_accuracy,0.639
best_trani_accuracy,0.95793
best_val_accuracy,0.68388
test_accuracy,0.56371
trani_accuracy,0.95631
val_accuracy,0.64987


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▄▄▅▅▅▆▆▆▇▇▇████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇█████████████
best_val_accuracy,▁▁▁▁▁▁▁▂▂▄▅▅▅▅▆▆▆▆▇▇████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▂▄▄▆▅▄▆▅▆▇▇▆██▅▅▅▅█▇▆█▇▆▇█▇▇▆▇▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▃▃▄▄▃▄▅▅▅▆▅▆▆▆▆▇▇▇█▇██▇████████
val_accuracy,▁▁▁▁▁▁▁▂▂▄▅▅▅▄▆▅▅▇▇▆██▅▅▆▅█▇▆█▇▆▇█▇▇▇▇▇▇

0,1
best_test_accuracy,0.64093
best_trani_accuracy,0.98058
best_val_accuracy,0.70214
test_accuracy,0.57529
trani_accuracy,0.97896
val_accuracy,0.67128


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▂▃▃▄▅▅▅▅▅██████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▅▅▅▅▆▆▆▇▇▇▇▇████████████
best_val_accuracy,▁▁▁▁▁▁▁▂▂▂▄▄▅▆▆▆▇▇██████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▂▁▃▃▄▅▅▄▆▄▄▅▇▇▇▆▇█▆▆▇▅▇▆▇▅▆▅▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▅▄▅▅▆▆▆▇▇▇▇▇█▇█▇███▇████
val_accuracy,▁▁▁▁▁▁▁▂▂▂▄▄▅▆▆▅▇▅▆▆▇█▇▇██▇▆▇▆▇▇▇▆▆▆▇▇▇▇

0,1
best_test_accuracy,0.6583
best_trani_accuracy,0.99353
best_val_accuracy,0.69773
test_accuracy,0.56757
trani_accuracy,0.99353
val_accuracy,0.67065


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▃▃▃▃▄▅▅▆▆▇▇▇███████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇██████████
best_val_accuracy,▁▁▁▁▁▁▂▂▃▄▄▄▄▄▆▆▆▆▇▇▇███████████████████
test_accuracy,▁▁▁▁▁▁▁▁▂▃▃▃▃▄▅▃▆▅▇▃▇█▆▅▆▄▅▆▇▆▇▇▇▇▆▆▇▆▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▂▂▃▄▃▅▅▅▄▆▆▆▅▆▄▆▆▇▇████▇▇████
val_accuracy,▁▁▁▁▁▁▂▁▃▄▄▃▄▄▆▄▆▆▇▄▇█▇▅▆▄▆▆▇▇▇▇▇▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.65637
best_trani_accuracy,0.97573
best_val_accuracy,0.68577
test_accuracy,0.60232
trani_accuracy,0.96764
val_accuracy,0.66625


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▃▆▆▆▆▆▆▆▆▆▇▇▇████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▃▃▃▃▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▄▇▇▇▇▇███████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▃▆▄▆▄▇▆▆▄▄▇▆▆█▆▇▆▆▆▇▇▇█▆▇▇█▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▃▂▃▂▄▅▅▄▃▆▆▅▆▆▇▆▇▇▇██▇██████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▄▇▅▇▄▇█▇▅▄▇▇▆█▇▇▆▆▆▇▇▇█▇▇▇▇▇▇

0,1
best_test_accuracy,0.60811
best_trani_accuracy,0.95955
best_val_accuracy,0.65932
test_accuracy,0.56757
trani_accuracy,0.95955
val_accuracy,0.64484


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▃▄▄▄▅▅▅▅████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▂▂▂▂▃▄▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇███████████
best_val_accuracy,▁▁▁▁▁▁▁▁▄▄▄▄▆▆▆▆████████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▃▅▃▄▃▇▇▆▆▆▇▆▇█▇▆▇█▆▆▅▇▆▆█▆▇▆▇▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▂▂▂▂▄▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇███████████
val_accuracy,▁▁▁▁▁▁▁▁▃▄▄▄▄▆▇▆▆▆▇▇▇██▇▇█▇▇▆▇▇▇█▇█▇▇▇▇▇

0,1
best_test_accuracy,0.69691
best_trani_accuracy,0.98382
best_val_accuracy,0.70151
test_accuracy,0.53861
trani_accuracy,0.98058
val_accuracy,0.67065


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▄▅▇███████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▃▄▆▆▇▇████████████████████████████
best_val_accuracy,▁▁▁▁▁▁▅▆▇███████████████████████████████
test_accuracy,▁▁▁▁▁▁▄▅▇█▆▅▇▅▇▅▇▇▅▅▆▆▆▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▃▄▆▆▇▇████████████████████████████
val_accuracy,▁▁▁▁▁▁▅▆▇█▆▅▇▅▆▅▇▇▅▅▆▆▅▆▆▆▇▆▆▆▅▆▆▅▆▆▆▆▆▆

0,1
best_test_accuracy,0.62548
best_trani_accuracy,1.0
best_val_accuracy,0.68577
test_accuracy,0.55598
trani_accuracy,1.0
val_accuracy,0.63539


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▃▇▇▇▇▇████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▂▄▄▆▆▆▇▇██████████████████████████
best_val_accuracy,▁▁▁▁▁▁▃▇▇▇██████████████████████████████
test_accuracy,▁▁▁▁▁▁▃▇▅▇█▄▅▆▇▇▆▇▅▇▇▇▆▇▆▆▆▆▇▇▇▆▆▆▇▇▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▂▄▄▆▆▅▇▇██████████████████████████
val_accuracy,▁▁▁▁▁▁▃▇▄▆█▄▅▆▇▇▆▇▅▇▇▇▆▆▅▆▆▆▆▆▆▆▅▅▆▆▆▆▆▆

0,1
best_test_accuracy,0.66988
best_trani_accuracy,1.0
best_val_accuracy,0.70025
test_accuracy,0.57722
trani_accuracy,1.0
val_accuracy,0.64547


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▇▇▇▇▇████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▄▆▇▇▇████████████████████████████
best_val_accuracy,▁▂▂▂▂▂▂█████████████████████████████████
test_accuracy,▁▁▁▁▁▁▁▇▅▇▆▄█▆▇▆▆█▅▇▇▆▅▇▆▇▆▆▇▆▆▆▅▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▄▆▇▇▆████████████████████████████
val_accuracy,▁▂▂▂▂▂▂█▆▇▇▄█▇▇▆▆▇▅▆▆▆▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆

0,1
best_test_accuracy,0.62355
best_trani_accuracy,1.0
best_val_accuracy,0.6864
test_accuracy,0.5444
trani_accuracy,1.0
val_accuracy,0.63413


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▄▄████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▂▂▅▆▇▇▇███████████████████████████
best_val_accuracy,▁▁▁▁▁▁▄▄████████████████████████████████
test_accuracy,▁▁▁▁▁▁▄▂█▇▇▆▆▇▅▆▇▇▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▂▂▅▆▇▇▇███████████████████████████
val_accuracy,▁▁▁▁▁▁▄▃█▆▇▆▆▇▅▆▆▇▅▆▅▇▆▆▇▆▆▆▆▆▆▆▆▇▇▇▆▆▆▆

0,1
best_test_accuracy,0.61583
best_trani_accuracy,0.99676
best_val_accuracy,0.66814
test_accuracy,0.55019
trani_accuracy,0.99676
val_accuracy,0.63287


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▆▆▆▆▆▆▇▇▇▇█████████████████████████████
best_val_accuracy,▁▄▄▄▄▄▄▇▇▇▇▇▇▇▇▇▇▇██████████████████████
test_accuracy,█▁▁▁▁▁▂▅▄▄▄▅▆▆▆▇▄▆▆▅▅▅▅▆▅▅▅▆▅▅▅▅▅▅▆▅▅▅▅▅
trani_accuracy,▁▆▆▆▆▆▆▇▇▇▇█████████████████████████████
val_accuracy,▁▄▄▄▄▄▅▇▆▆▇█████▇▇█▇█▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.62162
best_trani_accuracy,1.0
best_val_accuracy,0.6738
test_accuracy,0.52703
trani_accuracy,1.0
val_accuracy,0.63602


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▂▅█████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▂▄▄▆▆▇▇████████████████████████████
best_val_accuracy,▁▁▁▁▁▃▅█████████████████████████████████
test_accuracy,▁▁▁▁▁▂▅█▄▇▇▅▆▄▄▄▄▅▅▅▅▄▄▄▅▅▅▅▅▅▅▅▄▅▅▄▅▄▅▅
trani_accuracy,▁▁▁▁▁▂▄▃▆▆▆▇█▇██████████████████████████
val_accuracy,▁▁▁▁▁▃▅█▄▇█▅▆▄▄▄▅▆▅▅▅▅▄▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
best_test_accuracy,0.64093
best_trani_accuracy,1.0
best_val_accuracy,0.66751
test_accuracy,0.51544
trani_accuracy,1.0
val_accuracy,0.61209


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▄▅▆▆██████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▃▄▆▆▇▇████████████████████████████
best_val_accuracy,▁▁▁▁▁▁▄▅▇▇██████████████████████████████
test_accuracy,▁▁▁▁▁▁▅▆█▄▆▇█▇▆▆▇▅▇▆▇█▇▇▆▇▇▆▆▇▆▇▇▇▇▆▇▇▆▆
trani_accuracy,▁▁▁▁▁▁▃▄▆▅▇▇▇███████████████████████████
val_accuracy,▁▁▁▁▁▁▅▅█▃▅▇█▆▆▆▆▅▆▅▅▆▅▅▅▆▅▅▅▆▅▅▅▅▆▅▅▅▅▅

0,1
best_test_accuracy,0.62162
best_trani_accuracy,1.0
best_val_accuracy,0.67947
test_accuracy,0.52317
trani_accuracy,1.0
val_accuracy,0.60453


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▅▅████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▃▄▆▆▇▇▇███████████████████████████
best_val_accuracy,▁▁▁▁▁▁▆▆████████████████████████████████
test_accuracy,▁▁▁▁▁▁▃▅█▄▅█▅▇▄▅▇▅▆▆▅▆▆▇▆▆▆▅▆▆▆▆▅▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▃▄▆▅▇▇▇███████████████████████████
val_accuracy,▁▁▁▁▁▁▃▆█▄▆▇▆▆▄▆▆▅▆▅▅▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆

0,1
best_test_accuracy,0.61004
best_trani_accuracy,1.0
best_val_accuracy,0.67317
test_accuracy,0.55212
trani_accuracy,1.0
val_accuracy,0.62657


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▆▆▆▆▆▇▇▇▇██████████████████████████████
best_val_accuracy,▁▄▄▄▄▄▇▇▇▇▇█████████████████████████████
test_accuracy,█▁▁▁▁▁▆▄▅▅▄▇▆▆▅▆▅▄▄▅▆▆▅▆▄▅▄▅▆▅▅▅▆▅▅▅▄▅▅▅
trani_accuracy,▁▆▆▆▆▆▇▇▇▇██████████████████████████████
val_accuracy,▁▄▄▄▄▄▇▆▆▇▆███▇█▇▆▆▇██▇▇▇▇▇▇█▇▇▇█▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.61969
best_trani_accuracy,0.99838
best_val_accuracy,0.66121
test_accuracy,0.50579
trani_accuracy,0.99838
val_accuracy,0.63161


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▅▆▆▆▆▆▆██████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▄▅▆▇▇████████████████████████████
best_val_accuracy,▁▁▁▁▁▁▁▅▅▅▆▇▇▇██████████████████████████
test_accuracy,▁▁▁▁▁▁▁▅▆▅▃▇▆▇▇▇▇▇▆▇▇▇▇▆▇▇▇▇█▅▆▇▆▇▆▇▆▆▇▇
trani_accuracy,▁▁▁▁▁▁▁▄▅▆▄▇████████████████████████████
val_accuracy,▁▁▁▁▁▁▁▅▅▄▂▇▆▇▆▇▇▇▅▆▇▇▆▆▇▇▆▇█▆▆▇▆▆▆▇▆▆▆▇

0,1
best_test_accuracy,0.59846
best_trani_accuracy,1.0
best_val_accuracy,0.67317
test_accuracy,0.54247
trani_accuracy,1.0
val_accuracy,0.64421


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁████████████████████████████████████
best_val_accuracy,▁▁▁▄████████████████████████████████████
test_accuracy,████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁████████████████████████████████████
val_accuracy,▁▁▁▄████████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54534
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁██████████████████████████████████████
best_trani_accuracy,▁▁▂▆████████████████████████████████████
best_val_accuracy,▁▁▄█████████████████████████████████████
test_accuracy,▇▇█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▂▆████████████████████████████████████
val_accuracy,▁▁▄█████████████████████████████████████

0,1
best_test_accuracy,0.64865
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▆██████████████████████████████████████
best_val_accuracy,▁███████████████████████████████████████
test_accuracy,█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▆██████████████████████████████████████
val_accuracy,▁█▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆

0,1
best_test_accuracy,0.53089
best_trani_accuracy,0.79126
best_val_accuracy,0.55227
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▂▇████████████████████████████████████
best_val_accuracy,▁▁▂█████████████████████████████████████
test_accuracy,███▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▂▇████████████████████████████████████
val_accuracy,▁▁▂█████████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁███████████████████████████████████████
best_trani_accuracy,▁▂██████████████████████████████████████
best_val_accuracy,▁▁▇█████████████████████████████████████
test_accuracy,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▂██████████████████████████████████████
val_accuracy,▂▁▇█████████████████████████████████████

0,1
best_test_accuracy,0.62741
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁██████████████████████████████████
best_val_accuracy,▁▁▁▁▁▁▆█████████████████████████████████
test_accuracy,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁██████████████████████████████████
val_accuracy,▁▁▁▁▁▁▆█████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▂████████████████████████████████████
best_val_accuracy,▁▁▂▃████████████████████████████████████
test_accuracy,███▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▂████████████████████████████████████
val_accuracy,▁▁▂▃████████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁████████████████████████████████████
best_val_accuracy,▁▁▁▂████████████████████████████████████
test_accuracy,████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁████████████████████████████████████
val_accuracy,▁▁▁▂████████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁██████████████████████████████████████
best_trani_accuracy,▁▁▂▇████████████████████████████████████
best_val_accuracy,▁▁▁▅████████████████████████████████████
test_accuracy,██▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▂▇████████████████████████████████████
val_accuracy,▃▃▁▆████████████████████████████████████

0,1
best_test_accuracy,0.62355
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁█████████████████████████████████████
best_val_accuracy,▁▁▃█████████████████████████████████████
test_accuracy,███▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁█████████████████████████████████████
val_accuracy,▁▁▃█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.63791
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▂██████████████████████████████████████
best_val_accuracy,▁▂██████████████████████████████████████
test_accuracy,█▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▂██████████████████████████████████████
val_accuracy,▁▂██████████████████████████████████████

0,1
best_test_accuracy,0.60232
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_accuracy,0.37838
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▁▂████████████████████████████████████
best_val_accuracy,▁▁▁▁▇███████████████████████████████████
test_accuracy,███▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trani_accuracy,▁▁▁▂████████████████████████████████████
val_accuracy,▄▄▄▁▇███████████████████████████████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.79126
best_val_accuracy,0.54156
test_accuracy,0.37838
trani_accuracy,0.79126
val_accuracy,0.54156
