In [None]:
from nltk.parse.corenlp import CoreNLPParser,CoreNLPDependencyParser
from tqdm.auto import trange, tqdm
from dataclasses import dataclass
import pandas as pd
import ast
import itertools
import wandb
import evaluate
from itertools import cycle
import numpy as np
import random
import time
from datetime import datetime
import collections
from sklearn.metrics import top_k_accuracy_score

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GATv2Conv, TransformerConv, SAGEConv, GraphConv, ResGatedGraphConv, ChebConv, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

from transformers import AutoTokenizer, get_scheduler
from transformers.models.bert.modeling_bert import BertModel


  return torch._C._cuda_getDeviceCount() > 0


In [None]:
import transformers
transformers.__version__

'4.18.0'

In [None]:
import torch_geometric as pyg
pyg.__version__

'2.2.0'

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# definitions

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [None]:
@dataclass
class myGNNoutput:
    loss: None
    logit: None
    emb: None

In [None]:
def get_loader(df, add_syllables=False, col='pos_seqs', limit=None, batch_size=32, shuffle=True, max_length=128):
    data_list = []
    if limit is not None:
        dfnew = df.sample(frac=1).reset_index(drop=True)[:limit]
    else:
        dfnew = df
    data_list = []
    count = 0
    for i in trange(len(dfnew), leave=False):
        curr = df.iloc[i]
        data = Data()
        data.edge_index = torch.tensor(curr['homo_edges']).T
        if data.edge_index.shape[1] >= max_length-1:
            count += 1
#             print(f"data {i} too long length {data.edge_index.shape[1]}")
            continue
        
        tokens = tokenizer(' '.join(curr[col]), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
        data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
        data.y = torch.tensor([curr['author']])
        if add_syllables:
            data.num_syllables = torch.tensor([17]+curr['num_syllables']+[17])
            
        if 'doc_id' in curr:
            data.doc_id = torch.tensor([curr['doc_id']])
        data_list.append(data)
    print(f'{count} data dropped because of exceeding max_length {max_length}')
    loader = DataLoader(data_list, batch_size=batch_size, shuffle=shuffle)
    return loader


In [None]:
GNNtype2layer = {'GCNConv':GCNConv, 
                 'ChebConv':ChebConv, 
                 'SAGEConv':SAGEConv, 
                 'GraphConv':GraphConv,
                 'ResGatedGraphConv':ResGatedGraphConv, 
                 'GATConv':GATConv, 
                 'GATv2Conv':GATv2Conv}

class myHomoGNN(torch.nn.Module):
    def __init__(self, num_layers, num_classes, add_self_loops=False, gnntype='GCNConv', add_syllables=False):
        super().__init__()
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.add_self_loops = add_self_loops
        self.GNNlayer = GNNtype2layer[gnntype]
        self.pos_emb_dim = 32 # this is determined by POS Bert
        
        self.add_syllables = add_syllables
        if add_syllables:
            self.num_syllables = 18 # the longest word has 17 syllables
            self.syllable_emb_layer = nn.Embedding(self.num_syllables, self.pos_emb_dim)
        
        self.gnns = nn.ModuleList()
        for i in range(num_layers):
            if gnntype in ['GCNConv', 'GATConv', 'GATv2Conv']:
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim, add_self_loops=self.add_self_loops))
            elif gnntype == 'ChebConv':
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim, K=2))
            else:
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim))
                
        self.classifier = nn.Linear(self.pos_emb_dim, self.num_classes)
        self.lossfn = nn.CrossEntropyLoss()
        
    def forward(self, x, edge_index, batch, y, ptr, num_syllable=None, readout='pool'):
        if self.add_syllables:
            syllable_emb = self.syllable_emb_layer(num_syllable)
            x = x + syllable_emb
        
        for i in range(self.num_layers):
            x = self.gnns[i](x, edge_index)
            x = F.relu(x)
        
        if readout == 'pool':
            x = global_mean_pool(x, batch) 
        elif readout == 'cls':
            x = x[ptr[:-1],:]
        
        x = F.dropout(x, training=self.training)
        logit = self.classifier(x)
        loss = self.lossfn(logit, y)
        return myGNNoutput(loss=loss, logit=logit, emb=x)

In [None]:
# load pretrained POS Bert
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# CCAT50

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

## testing dataset size only

In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val)
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv']
ADD_SELF_LOOPS = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype
                     )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="homo POS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▂▄▄▄▅▆█▆▇█▇████████▇████████▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.66667
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▆▃▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇▇█▇▇█▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▄▅▅▅▅▆▆▆▇▇▇▆████▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▄▂▅▂▅▅▅▅▅▆▅▆▆▅▆▇▇▇▇▇▇▇▇▇▇█▇█
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67273
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇█▇███▇███▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.55758
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▅▃▄▄▆▅▇▆▆▇▅▇▅▅█▇▇▇▇▆▆▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▄▅▅▄▅▅▇▅▅▅█▇▆▇█▇▇▇█▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.60303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333923021952311, max=1.0)…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


## testing upos

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
checkpoint = '/scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val, col='upos_seqs')
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv']
ADD_SELF_LOOPS = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, col = 'upos_seqs', limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="homo UPOS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▇▇▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▇██▇█▇████▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6303
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▆▆▆▆▆▅▆▆▆▇▇▆▇▆▇▇▇▆███▇██▇██▇█████▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.62424
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▃▄▆▇▇▇▇▇▇▇█▇▇█████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.63333
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▆▆▆█▅▆▅▅▆▆▅▆▆▆▇▆▆▆▆▆▇▇▆▆▇▆▆▆▆█▆▇▆
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.59394
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.58788
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▅▆▅▅▅▆▅▇▆▆▆▆▇▆█▆▆█▇▆▇▇▇█▇▇█▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▂▃▃▄▅▆▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57879
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▄▄▄▄▄▅▆▆▆▆▇▇█▇▇▇▇████▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.61212
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


## testing gnntype

In [None]:
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60

warmup_ratio = 0.15

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv', 'SAGEConv', 'GraphConv', 'ResGatedGraphConv', 'GATConv', 'GATv2Conv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (gnntype)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/14 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▆▆▆▅▇▆▇▇▆▇▆▇▆█▇▇█▇▇▇▆█▇█▇██▇█████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76061
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▃▅▅▅▆▆▆▆▆▇▇▇█▇▇█▇▇▇▇▇████▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▄▅▅▇▅▇▇▆▇▇█▇███▇▇█▇█▇▇▇▇▇▇▇███▇▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72727
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▂▅▅▆▄▆▆▇▇▇▇▇▇▇▇▇▇█▇▇█▇▇▇████▇▇██▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▂▄▃▆▆▅▆▆▇▇▇▇▇▇▇▇██▇▇▇▇▇█▇▇▇█▇▇▇█▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▂▄▃▆▆▆▇▇▇█▇▇▇▇▇█▇██▇█▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▃▃▅▆▆▆▇▆▇▇▆▇▇▇▇█▇▇▇██▇▇████▇▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69394
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▂▂▂▃▃▃▃▆▆▆▆▆▆▇▇▆▇▆▇▇█▇▇██▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▃▅▄▅▆▆▆▅▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇██▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▆▆▆▆▇▆▆▇▇▆▇▇▇▇█▇▇▇█▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▆▆▆▇▇▇▇▇▇▇▇▆▇█▇▇██▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▂▄▅▅▅▄▆▅▅▆▆▆▆▆▇▇▇▇█▇██▇▇███▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71515
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▄▅▄▅▅▅▅▇▇▆▆▇█▇█▇▇█▇▇███▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


## adding num of syllables

In [None]:
epochs = 60

warmup_ratio = 0.15

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'GATConv', 'GATv2Conv']
ADD_SELF_LOOPS = [False, True]
ADD_SYLLABLES = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (syllables)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/24 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▃▃▃▄▃█▇▇▆███▇▇█▇▇▇▇█▇▇▇█▇▇▇▇█▇█▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.65758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▃▃▃▄▅▇▇▆▇▇▇▇▇▇▇▇██▇▇████▇███▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▃▃▃▃▅▄▆▄▇▇▇▇█▅▇▇▇▇▇█▇▇██▇██▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71515
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▂▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▆▇▇▇▇███████▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▄▆▆█▆▇▇▇▇▇▇█▇█▇██▇█▇██▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▇▆▇█▇█▇███▇███▇▇▇█▇▇▇▇▇▇█▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▂▄▃▆▆▇▇▇▇█▇▇▇▇▇████▇█▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▂▅▅▅▅▆▆▇▆▇▇▇▇▇▇█▇█▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76667
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▆▅▇█▆▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▅▆▆▇▆▇▇▇████████▇▇██▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70606
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▃▄▅▄▆▆▆▇▆▆█▇▇▇▇███▇██▇█▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇█▇▇▇▇██▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▆▆▇▇▇▇█▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67879
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▄▄▅▅▅▅▆▆▆▆▆▆▇▇██▇▇▇█▇▇███▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67879
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333768049875895, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▃▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▂▃▄▄▄▅▅▅▆▅▆▇▇▆▆▆▇▇▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▆▆▇▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▄▅▅▆▅▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▂▂▄▅▅▄▆▆▆▇▇▇▇█▇██▇██▇██▇█▇▇█▇██▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▃▅▅▄▆▅▆▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇█▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▂▁▅▅▅▅▇▆██▇▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▃▆▅▇▇▇▆▆▇▇▇▇▇▇▇█▇██▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68788
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▄▅▄▅▅▄▅▇▇▆▆▇█▇█▇▇█▇▇███▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▃▃▄▃▄▅▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇█▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7
global_step,2460.0


## 2 authors for the best

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▆▆▆▇▆▇▇▇▇▇█▇█▇██▇▇███████████████
best_accuracy,▁▁▂▂▃▃▅▆▆▆▇▇▇▇▇▇▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
best_accuracy,0.76667
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▅▂▅▆▅▆▆▆▅▇▇▅▆█▇▇▆▇▆▇▇▇▇█▆█▇█▇▇▇▇█▇
best_accuracy,▁▁▁▁▁▂▅▅▅▆▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76061
best_accuracy,0.78485
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▁▃▄▅▅▅▆▆▆▆▆▆▇▆▆█▇███▇██▇▇█▇▇█▇████
best_accuracy,▁▁▁▁▁▄▄▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▂▅▄▅▅▆▆▆▅▇▇▇▆▇▇▇▆▇▇████▇▇████▇█████
best_accuracy,▁▂▂▂▂▂▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
best_accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▂▁▅▄▅▄▆▇▇▅▆▇█▆▆▇▇▆█▇▇▇▇▇▇▇██▇████
best_accuracy,▁▁▁▁▁▁▁▂▂▅▅▅▅▆▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
best_accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▅▅▇▇▅▇▆▇▇▇▇▇▇████▇▇███████████████
best_accuracy,▁▁▁▁▁▄▅▅▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▄▇▇▇▆▇▇▇▇▇▇▇▇▇███▇█▇█████████████
best_accuracy,▁▁▃▃▃▃▅▅▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
best_accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇████▇▇▇████████▇█████
best_accuracy,▁▂▃▃▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▅▆▆▆▇▇▇▇▇▆▇▇▇██▇██▇▇███████▇████
best_accuracy,▁▃▃▃▃▃▃▃▅▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75455
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▅▄▆▅▆▆▆▆▇▇▇▇██▇▇▇██▇▇███▇████▇████
best_accuracy,▁▃▃▃▃▃▅▅▆▆▆▆▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
best_accuracy,0.7303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▁▃▂▄▄▅▅▇▆▇▇▇▇▇▆▇▇▇▇█▆▇▇██▇▇████▇███
best_accuracy,▁▁▁▁▂▂▃▃▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75455
best_accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▃▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇█▇█████████████████
best_accuracy,▁▁▁▁▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.74848
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▂▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██▇████████████
best_accuracy,▁▃▃▃▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78182
best_accuracy,0.79697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▁▂▄▄▅▅▆▆▆▇▇▇▇█▇▇██▇████████▇███████
best_accuracy,▁▁▁▁▃▃▃▄▅▅▅▆▆▆▆▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74848
best_accuracy,0.75455
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▆▂▆▃▅▅▆▅▆▆▆▇▆▇▇▇▇▇▇███▇██▇████▇█████
best_accuracy,▁▁▂▂▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76667
best_accuracy,0.7697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▄▅▆▆▆▆▆▇▇▇▇▇▇▇▆█▇▇█▇▇██▇████▇▇██████
best_accuracy,▁▁▃▃▃▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72727
best_accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▆▅▆▅▇▇▇▆▇▇▅▆▆▇▇█▇▇▇█▇▇██▇███▇▇▇███▇█
best_accuracy,▁▁▁▁▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71212
best_accuracy,0.72424
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▁▂▅▄▆▅▆▆▇▇▇▇▇▇▇▇▇█████▇▇█▇██▇███████
best_accuracy,▁▁▁▁▁▁▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74848
best_accuracy,0.76061
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033339587847391765, max=1.0…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▃▃▃▅▄▄▆▆▆▆▇▇▇▆▇▆▇▇▇▇▇▇▇██▇█▇██▇████▇
best_accuracy,▁▁▁▁▃▃▃▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▅▅▆▆▇▇▇▇█▇▇▇█▇▇█▇▇▇▇██▇████▇██▇███
best_accuracy,▁▂▂▂▂▃▅▅▆▆▆▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
best_accuracy,0.7303
global_step,2460.0


## doc acc

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 2e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (doc acc)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        df_doc_acc = pd.DataFrame()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
            df_doc_acc = pd.concat([df_doc_acc, pd.DataFrame({'doc_id':data.doc_id.cpu().detach().numpy(), 
                                                              'author':data.y.cpu().detach().numpy(), 'pred':output.logit.argmax(axis=-1).cpu().detach().numpy()})])
        gb = df_doc_acc.groupby('doc_id')
        doc_acc = (gb['pred'].agg(lambda x: x.value_counts().index[0]) == gb['author'].first()).mean()
        evaluation = metric.compute()
        evaluation.update({'doc_acc':doc_acc})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 908 too long length 134


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338189125061035, max=1.0…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▂▂▃▃▅▅▅▅▆▆▆▅▆▇▇▆▇▆█▇▇▇▇▇▇▇▇█▇▇▇▇
doc_acc,▂▂▂▂▂▂▂▂▁▁▃▂▅▆▆▇▇▆▇▄▄▇▇▅▇▆▇▆█▆▇▇▇▇▅█▆▆█▇

0,1
accuracy,0.71212
doc_acc,0.8


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▁▄▅▆▅▆▅▇▆▆▇▇▇▇▆█▇▇██▇█████▇██████
doc_acc,▁▁▁▁▁▁▂▁▄▃▆▃▆▅▅▅▆▅▇▅▆▄█▆▇██▄▇████▅██▇███

0,1
accuracy,0.7303
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.0333393653233846, max=1.0))…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▁▄▅▅▅▅▅▆▅▇▇▆▇▆▇▇▇█▇▇█▇▇██▇███████
doc_acc,▁▁▁▁▁▁▁▁▂▂▆▃▅▃▅▃▅▆▃▆▃▄▆▇▆▅▇▆▇▄▇▆▇▇█▆▇▆▆▇

0,1
accuracy,0.76364
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▆▄▄▆▇▇▇▆▇▅▇█▇█▇██▇█▇█████████████
doc_acc,▁▁▁▁▁▁▁▅▂▂▅▇▇▅▃▆▃▅█▅▆▄▇▅▄▇▆▄▆▇▆▇▇▇▇▆▇▇▆▆

0,1
accuracy,0.73636
doc_acc,0.85


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▂▂▄▅▅▅▆▅▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇██▇██████████
doc_acc,▁▁▁▁▁▁▁▁▄▅▃▂▅▄▆▆▃▇▄▄▄▄▄▅█▇▄█▆▃▆▆██▇▄█▇██

0,1
accuracy,0.74848
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▃▆▆▆▆▆▇▇▆▇▇▇▇▇▇▇█▇█▇█▇▇██████████
doc_acc,▁▁▁▁▁▁▂▁▃▃▅▄▅▆▅▅▅▆▃▅▃▆▇▇▇▇▇▇▇▅█▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.74848
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▂▆▆▆▅▇▇▆▇▇▇▇▇▇▇██▇▇█▇▇████▇█▇████
doc_acc,▁▁▁▁▁▁▁▁▄▃▃▂▇▇▅▇▅▇▅▅█▇▇▇▃▅▇▆▇▇▇▇▇▆█▇▇▇▇▇

0,1
accuracy,0.73939
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▂▂▆▅▆▇▇▇▇▇▇▇▇▇██▇██▇█▇▇█▇████████
doc_acc,▁▁▁▁▁▁▁▁▁▆▂▂▆▇▃▇▂▇▅▂▇▇▇▇▇█▇▅▇▇▇▂▇▇▇▇▇▇▇▇

0,1
accuracy,0.73636
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▅▂▅▅▅▆▆▆▇▆▆▆▇▇▆▇▇▇█▇█▇▇▇██████████
doc_acc,▁▁▁▁▁▁▅▁▅▃▆▃▆▅▅▇▅▅▇▇▄▇▇▅▇▇▇█▇▅▇▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.76061
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▅▅▅▅▆▆▆▇▆▆▆▆▆▇▇▇▇▇██▇███▇▇███████
doc_acc,▁▁▁▁▁▁▁▃▄▄▆▅▆▆▅▇▆▅▅▆▇█▆█▆▅▇▆█▄▇▆▇▆▆▇▆▆▇▆

0,1
accuracy,0.73939
doc_acc,0.85


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▄▃▃▅▅▆▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇██▇█████
doc_acc,▁▁▁▁▂▁▁▃▄▃▁▃▆▆▆▄▆▄▆▇▇▇▇▇▇█▆▇▇▇█▇██▇█▇▇█▇

0,1
accuracy,0.73636
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▃▄▃▄▆▆▆▆▆▆▆▆▆▆▇▆▇▇▆▇▇▇▇▇▇▇███▇██████
doc_acc,▁▁▁▁▁▁▁▁▃▇▅▂█▆█▄███▆█▆▄█▇▆▇▅█▇▇██▆█▇████

0,1
accuracy,0.76667
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▄▅▄▅▅▅▆▆▆▇▇▇▇▇█▇█▇▇█▇██▇███▇▇█████
doc_acc,▂▁▂▂▂▂▅▅▅▃▅▆▅▇▇▇▆▆███▇██▆██▇█▇▇██▇▅█████

0,1
accuracy,0.76061
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▄▅▅▄▆▆▆▅▆▆▆▇▇▇▆█▇▇█▇██▇███████████
doc_acc,▁▁▁▁▁▆▁▅▃▅▇▃▇▅▃▃▃██▇▅█▇▇▇▆▇▇▇▇▇▇▇▇██▇█▇█

0,1
accuracy,0.76667
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333880106608073, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▄▇▆▇▆▇▇▇▇▇▇▇▇█▇▇▇▇▇████▇█████████
doc_acc,▂▁▂▂▂▂▂▂▅▄▅▇▅▅▇█▅▇▇▇▇█▇▇▇▆█▇▇▆▇▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.74545
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▅▄▆▆▆▆▆▇▇▇▇▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██
doc_acc,▁▁▁▁▁▁▆▁▅▇▅▃▆▄▅█▇▆▇▅▅▇▇▇▇█▇▅▇▇▇▇▇▇▆▆▆▇██

0,1
accuracy,0.79697
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▁▄▄▅▅▆▆▆▇▆▇▆▇▆▇▇██▇▇▇▇██████▇▇▇▇▇███
doc_acc,▁▂▂▂▂▃▅▅▆▆▅▆▇▇█▃▆▆▇▇█▇▇▆█▅██▇▇██▆▇▇▇▅█▇█

0,1
accuracy,0.73636
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▄▄▄▆▆▆▇▇▇█▇█▆██▆███▇█▇██▇███▇▇█▇████
doc_acc,▁▁▁▁▄▁▁▂▃▆▅▅▄▇▆▇▅▄▅▅▇▅█▆█▇▇▇███▇▇██▇██▇█

0,1
accuracy,0.71515
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▃▃▄▅▅▆▇▅▅▇█▇████████████▇██▇█▇▇████
doc_acc,▁▁▁▁▁▁▁▂▆▃▃▆▃▃▆▆▆▇▇▆▆▇█▆▆▆▅▆▆▇▆█▇██▆▆▆▆█

0,1
accuracy,0.73636
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▂▃▃▄▄▅▃▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇██▇████████
doc_acc,▁▁▁▁▂▁▂▂▆▁▂▅▆▂▇▆▂▄▇▆▇▄▆▇▇▇▇▇▆▇▇▇▇█▇█▇▇▇▇

0,1
accuracy,0.73333
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▅▄▅▅▆▆▆█▆█▆▇██▇▆██▇▇██▇▇██▇▇██▇███
doc_acc,▁▁▁▁▁▁▅▅▇▇▆▆▂▇▄█▄▇█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇

0,1
accuracy,0.75455
doc_acc,0.85


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▂▄▃▅▆▆▆▆▇▇▇█▇▇▇▇▇██▇█████▇▇▇▇▇▇▇▇▇
doc_acc,▁▁▁▁▁▁▁▆▁▃▅▅▃▆▆▇▄▆▆█▇▅▄▇▆▅█▇▇▆▇▆▆▆▆▅▆▆▆▆

0,1
accuracy,0.75455
doc_acc,0.85


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▃▃▄▅▅▅▆▇▆▇▇▇▇▆▇▇▇▇▇▇▆▇▇▇█▇▇█▇█▇▇██
doc_acc,▁▁▁▁▁▂▂▃▆▆▆▆▆▇▆▇▇▇▇▃▇▇▆█▅▆▄▆▇▇████▆▇▇▇▇▇

0,1
accuracy,0.79394
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▄▆▅▅▇▅▇▇▇▆▇▇▆▇▇█▇▇█▇███▇██████████
doc_acc,▁▁▁▁▁▅▂▆▆▂▇▂▇▇█▄▇▆▄▆▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▆▆▆▇▇

0,1
accuracy,0.77879
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▂▅▆▆▅▅▇▇▇▆▇▇▆▇▇▇▆▆▇▇█▇█▇███▇▇██▇██
doc_acc,▁▁▁▁▁▂▁▆▇▇▅▅▇▇▇▅▆█▄▇▇▇▅▄▇▄█▇█▇▇▇█▅█▇███▇

0,1
accuracy,0.76667
doc_acc,0.85


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▅▅▅▆▇▇▆▇▇▇▇▇▇▇▇██▇██▇▇▇███████████
doc_acc,▁▁▁▁▁▁▂▁▁▂▅▄▂▇▇▄▇▇▇▄█▇▇█▇▅▇▅█▅▇▇▅███▇▇▇█

0,1
accuracy,0.75758
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▄▅▇▅▆▆▇▇▇█▇███▇██▇████████████████
doc_acc,▁▁▁▁▁▁▂▃▆▂▆▇█▃██▇███▇██▅████▇▇██▇█▇█▇███

0,1
accuracy,0.77273
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▄▄▃▆▇▇█▇▇▇▇██▇▇███▇▇███▇▇███████████
doc_acc,▁▁▁▁▅▂▁▄▆▇▆▅▅█▆▇▅▃▄█▅█▇▇█▆▆▇▇█▇▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.75455
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▅▄▆▄▅▅▆▅▇▇▇▆▇▆▇█▇▇▇████▇█▇▇████▇███
doc_acc,▁▁▁▁▁▅▁▃▅▂▂▂▂▅▅▅▃▅▃▇▇▅▆▅▇▇▇▇▅█▆▆▇▇▇▆▇▇▇▇

0,1
accuracy,0.73939
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▄▅▆▆▇▆▇▇▇▇▇▇▇▇█▇▇▇█▇▇▇█▇▇██▇██▇████
doc_acc,▁▁▁▁▁▂▂▂▃▅▅▅▅▇▅▇▅▅▇▆▇▇▅▅█▅▇▇▇▅█▇▇▇▇▅▇▇▇▇

0,1
accuracy,0.76364
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▄▄▄▆▅▆▆▆▆▆▇█▇▇▇▇▇▇████▇█████████████
doc_acc,▁▁▁▁▆▅▂▇▆▇▆▆▃▆▆█▇▇▆▆▇▇▇▇▇█▇▇▇█▇██▇█▇█▇▇▇

0,1
accuracy,0.82424
doc_acc,0.9


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▄▃▄▅▄▅▅▅▆▆▆▆▆▇▆▇▇▇▇█▇▇▇██▇███▇██████
doc_acc,▁▁▁▃▃▃▆▇▅▅▅▃▆▆████▇▇███▇▆████▇██████████

0,1
accuracy,0.81818
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▂▃▃▄▅▅▆▅▆▆▆▇▆▇▇▆▇█▆▇▇▇█▇▇██▇█▇█▇▇▇▇█
doc_acc,▁▁▁▁▁▅▅▄▂▆▅▄▆▆▅▇▆▇▆▆▆▇▄▇▆▅▇▅▆██▇█▇█▇▇▇▇█

0,1
accuracy,0.79394
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▄▂▄▅▅▅▄▆▆▆▆▆▆▇▇▆▇▇▇▇█▇▇▆██▇██▇████▇█
doc_acc,▁▁▁▁▅▁▅▃▅▂▅▅▆▂▆▆▆▇▇▄▇▇▇▅▇▇▇▃▇▇▇▇█▇▇▇██▇█

0,1
accuracy,0.8
doc_acc,1.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▄▄▅▄▅▅▆▆▆▆▇▇▇▇▇▆▇▇█▇███▇██▇▇████████
doc_acc,▁▁▁▁▃▂▁▁▆▆█▆█▆▇▆█▇▆▅▇▇█▇█▇█▆████████████

0,1
accuracy,0.78485
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▄▃▄▄▅▅▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█▇███▇████████
doc_acc,▁▁▁▁▁▅▂▆▃▅▇▇▇▆▅▅▂▅▇█▇▇▆▇▇▇█▇▇▇▇▆▇▇▇▇▇█▇▇

0,1
accuracy,0.76667
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▅▅▄▇▆▆▆▅▅▇▇▇▇▆█▇▆▆▇▇▇▇▇██▇█▇▇▇█▇█▇▇▇
doc_acc,▁▁▁▁▂▇▅▇▅▅▇▂▅██▅▇▇▇▇▆▇▅▇▇▇▅▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.75152
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▄▅▅▆▆▆▇▆▅▅▇▇▇▇██▇▆█▇████▇▇████████
doc_acc,▁▁▁▁▁▁▁▃▃▆▃▇▇▇▆▃▆█▇▇██▇▆█▇█▇█▇▆▇▇█▇█████

0,1
accuracy,0.77576
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▄▄▆▆▆▆▆▆▆▆▇▆▆▆▆▇▇▇▇▇▇█▇▇█▇▇███▇█▇███
doc_acc,▁▁▁▁▂▂▇▇▆▄▅▇▇▆▇▇▃▇▇▆▇▇▇▇▆█▇▇▇▇▇██▇▇█▇▇█▇

0,1
accuracy,0.75758
doc_acc,0.95


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▂▄▄▅▃▆▇▆▆▆▇▇▇▇█▇▇▇▇▇▆▆▇█▇▇███▇▆█▇████
doc_acc,▁▁▁▁▅▂▅▁▆▇▅▅▅▇▇▅▇▇▅▇▇▇▇▆▇███████▅▇█▇▇███

0,1
accuracy,0.75455
doc_acc,1.0


## all 50 authors

In [None]:
# load pretrained POS Bert
max_length = 256
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_256_pos_mlm_0/checkpoint-120000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_256_pos_mlm_0/checkpoint-120000/ were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']
num_sent_per_text = 3

file = f'../../data/CCAT50/processed/author_all_sent_{num_sent_per_text}_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = f'../../data/CCAT50/processed/author_all_sent_{num_sent_per_text}_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 90
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [3]
LR = [2e-3]
READOUT = ['pool']
GNNTYPE = ['ChebConv'] # 'GCNConv', 
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables, max_length=max_length)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables, max_length=max_length)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=50, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['num_sent_per_text'] = num_sent_per_text
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (all authors)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True,
                     settings=wandb.Settings(start_method='thread'))
    
    best_evaluation = collections.defaultdict(float)
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        df_doc_acc = pd.DataFrame()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
            df_doc_acc = pd.concat([df_doc_acc, pd.DataFrame({'doc_id':data.doc_id.cpu().detach().numpy(), 
                                                              'author':data.y.cpu().detach().numpy(), 'pred':output.logit.argmax(axis=-1).cpu().detach().numpy()})])
        gb = df_doc_acc.groupby('doc_id')
        doc_acc = (gb['pred'].agg(lambda x: x.value_counts().index[0]) == gb['author'].first()).mean()
        
        # logging
        evaluation = metric.compute()
        evaluation.update({'doc_acc':doc_acc})
        wandb.log(evaluation, step=pbar.n)
        
        # logging best
        for key in evaluation:
            best_evaluation[f'best_{key}'] = max(best_evaluation[f'best_{key}'], evaluation[key])
        wandb.log(best_evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▄▅▅▆▆▆▆▇▆▇▆▆▇▇▇▇▇▇█▇▇█▇█████████████
best_accuracy,▁▂▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇██████████████████
best_doc_acc,▁▂▂▃▄▄▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇██████████████████
doc_acc,▁▂▂▃▄▄▄▆▅▆▅▆▆▆▆▆▇▇▇▇▇█▇█▇▇▇▇▇█▇█████████

0,1
accuracy,0.14452
best_accuracy,0.14743
best_doc_acc,0.26518
doc_acc,0.25506


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇█▇▇▇█▇██████████████
best_accuracy,▁▁▃▃▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████
best_doc_acc,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
doc_acc,▁▁▂▂▃▃▄▄▄▅▅▅▆▆▇▆▆▇▇▇▆▇▆▇▇▆▇▇▇█▇▇████████

0,1
accuracy,0.14092
best_accuracy,0.14466
best_doc_acc,0.26316
doc_acc,0.25101


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████
best_accuracy,▁▁▂▃▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇████████████████████
best_doc_acc,▁▁▂▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████
doc_acc,▁▁▂▃▄▃▄▅▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇██▇▇▇█▇▇

0,1
accuracy,0.1405
best_accuracy,0.14189
best_doc_acc,0.24899
doc_acc,0.2247


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▄▅▅▅▆▅▆▇▇▇▇▇▇▇▇▇██▇█▇█▇████████████
best_accuracy,▁▁▂▃▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
best_doc_acc,▁▁▂▂▃▄▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████
doc_acc,▁▁▂▂▃▄▃▅▅▆▃▆▇▆▆▆▅▇▇▇▆▆▇▇▇▇▇▇▇█▇▇▇█▇▇████

0,1
accuracy,0.14078
best_accuracy,0.14161
best_doc_acc,0.25101
doc_acc,0.23077


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████
best_accuracy,▁▂▂▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████████
best_doc_acc,▁▁▂▃▃▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
doc_acc,▁▁▂▃▄▄▄▅▅▆▆▆▆▇▇▇▇▆▆▇▆▇▇▇▇█▇▇▇▇▇███▇▇█▇██

0,1
accuracy,0.13245
best_accuracy,0.13412
best_doc_acc,0.24291
doc_acc,0.2085


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
best_accuracy,▁▁▃▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇██████████████████
best_doc_acc,▁▁▂▃▄▄▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
doc_acc,▁▁▂▃▄▄▆▄▅▆▇▆▅▆▆▇▆▇▆▇▇█▇█▆█▇▇██▇█▇▇███▇▇▇

0,1
accuracy,0.11345
best_accuracy,0.11553
best_doc_acc,0.19028
doc_acc,0.17611


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▃▅▅▅▇▇▇▇▇▇▇▇▇██▇▇███████████████████
best_accuracy,▁▂▂▃▄▅▅▅▇▇▇▇▇▇▇▇▇███████████████████████
best_doc_acc,▁▁▁▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
doc_acc,▁▁▁▃▂▄▅▄▆▆▇▅▆▆▇▇▇▇▇▆▇█▇▇▇▇▇███▇▇█▇█████▇

0,1
accuracy,0.119
best_accuracy,0.12053
best_doc_acc,0.21053
doc_acc,0.19028


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▄▄▅▆▆▅▆▇▆▇▇▇▇▇▇▇▇▇▇█▇██████████████
best_accuracy,▁▁▂▃▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
best_doc_acc,▁▁▂▂▃▃▄▄▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
doc_acc,▁▁▂▂▃▃▄▄▇▅▅▆▅▅▆▆▆▆▆▆▇▇▇▇█▇▇███▇▇▇▇▇█▇▇██

0,1
accuracy,0.11637
best_accuracy,0.11914
best_doc_acc,0.2166
doc_acc,0.21053


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█▇▇▇▇█▇██▇███████████
best_accuracy,▁▁▂▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████████████████████
best_doc_acc,▁▁▂▂▂▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████
doc_acc,▁▁▂▂▂▅▄▄▅▅▅▅▆▆▇▅▇▆▆▇▇▇▇▇█▇▇▇▇▇▇▇▇█▇▇▇▇▇▇

0,1
accuracy,0.11817
best_accuracy,0.11817
best_doc_acc,0.21255
doc_acc,0.19636


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▄▅▆▆▆▇▆▆▇▇▇▇▇▇▇█▇▇█▇▇██████████████
best_accuracy,▁▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
best_doc_acc,▁▁▂▃▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇████████████████
doc_acc,▁▁▁▃▄▄▄▅▅▅▆▅▅▆▆▆▆▇▆▇▇▇▇█▇▇▇▇█▇███▇▇▇███▇

0,1
accuracy,0.11401
best_accuracy,0.11595
best_doc_acc,0.2085
doc_acc,0.18623


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▄▄▄▅▆▆▆▇▇▇▇▇▇▇█▇▇▇▇████████████████
best_accuracy,▁▁▂▃▃▄▄▄▅▆▆▇▇▇▇▇▇▇▇█████████████████████
best_doc_acc,▁▁▁▃▃▃▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
doc_acc,▁▁▁▃▃▃▄▃▅▆▅▆▇▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇█████

0,1
accuracy,0.18308
best_accuracy,0.18322
best_doc_acc,0.33198
doc_acc,0.31377


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▃▄▄▅▆▆▆▆▇▇▇▇▇▇▇█▇████████████████████
best_accuracy,▁▂▂▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████
best_doc_acc,▁▁▁▃▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████
doc_acc,▁▁▁▃▄▄▄▅▅▆▆▆▆▇▆▇▇▆▇▇▇▇▇▇▇▇▇█▇███████████

0,1
accuracy,0.1767
best_accuracy,0.1846
best_doc_acc,0.32591
doc_acc,0.30972


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▃▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇█▇███▇████████████
best_accuracy,▁▁▁▂▃▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇███████████████████
best_doc_acc,▁▁▁▂▃▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████
doc_acc,▁▁▁▂▃▃▄▅▅▅▆▇▆▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇██▇██▇█████

0,1
accuracy,0.18516
best_accuracy,0.18738
best_doc_acc,0.34413
doc_acc,0.33603


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▄▄▄▅▅▆▆▆▇▇▇▇▇▇███▇██████████████████
best_accuracy,▁▁▂▃▄▄▄▅▆▆▆▆▇▇▇▇▇▇██████████████████████
best_doc_acc,▁▁▂▂▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇████████████████████
doc_acc,▁▁▂▂▄▃▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇█▇█▇██▇█▇▇▇▇██████

0,1
accuracy,0.18252
best_accuracy,0.18336
best_doc_acc,0.32591
doc_acc,0.31377


  0%|          | 0/7210 [00:00<?, ?it/s]

0 data dropped because of exceeding max_length 256


  0%|          | 0/28943 [00:00<?, ?it/s]

4 data dropped because of exceeding max_length 256
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/81450 [00:00<?, ?it/s]

FileNotFoundError: Couldn't find a module script at /home/jz17d/Desktop/style-models/code/POS/accuracy/accuracy.py. Module 'accuracy' doesn't exist on the Hugging Face Hub either.

In [None]:
run.finish()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▃▃▅▆▆▇██
best_accuracy,▁▁▁▃▃▃▅▆▆▇██
best_doc_acc,▁▁▁▂▃▃▅▅▆▇▇█
doc_acc,▁▁▁▂▃▃▅▅▆▇▇█

0,1
accuracy,0.09126
best_accuracy,0.09126
best_doc_acc,0.13968
doc_acc,0.13968


# imdb

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/imdb/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/imdb/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN imdb (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▆▆▃▇▇▇▅▇█▇█▇▇▇██▇█▇█▇▇█▇██▇████████
best_accuracy,▁▁▃▃▅▆▆▆▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87602
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▅▄▅▆▄▇▇███▇████▇█▇▇▇█████▇██████████
best_accuracy,▁▁▂▂▅▅▅▆▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87243
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▆▆▆▆▇▇█▆▇▇█▆▇▆▇█▇▇████▇█████████████
best_accuracy,▁▁▂▄▆▆▆▆▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87113
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▅▆▆▇▇▇▇▇▇▇▇████▇▇▇█▇███▇███████████
best_accuracy,▁▁▂▃▃▄▆▆▇▇▇▇▇▇▇▇████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8721
best_accuracy,0.87439
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▅▅▆▇▇▇▇▇█▇▇▇█▇▇▇█▇█▇█▇█▇███▇████████
best_accuracy,▁▁▁▂▅▅▆▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87504
best_accuracy,0.87928
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▄▆▆▇▆▇▆▇▇▇▇▇▇███▇▇█▇██▇███████▇█████
best_accuracy,▁▁▁▃▅▆▆▇▇▇▇▇▇▇▇▇████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86591
best_accuracy,0.86819
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▅▅▆▆▇▆▆▇▇▇▇▇▇▇████▇▇▇█▇█████████████
best_accuracy,▁▁▂▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86688
best_accuracy,0.86884
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▆▆▇▆▆▆▇▇▇█▇▇▇▇▇▇███████▇██▇██▇▇▇████
best_accuracy,▁▁▂▄▆▆▇▇▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86721
best_accuracy,0.87308
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇█▇▆▇████████▇███▇█████
best_accuracy,▁▁▃▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87276
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▆▇▇▇▇▇█▇████▇█▇█▇██████████████████
best_accuracy,▁▁▃▄▅▆▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87113
best_accuracy,0.87406
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▆▅▆▇▇█▇▆█▇▇███████████▇▇▇▇███▇▇█▇▇▇
best_accuracy,▁▁▃▄▅▆▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87015
best_accuracy,0.87896
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▆▆▇▄▇▇▇▇█▇███▇█▆▇██▇▇▇█▇▆▇▆▇▇▇▆▇▇▇▇▇
best_accuracy,▁▁▃▄▆▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.88124
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▅▆▄▇▇███▆██▆███▇██▇▇▇▇▇▇▆▇▆█▇▇▇▇▇▇▇
best_accuracy,▁▁▃▄▅▅▆▆▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8633
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▃▅▆▅▆▇▇▆█████▇███▇█▇▇█▇█▇██▇▇▇▇▇▇█▇▇▇
best_accuracy,▁▁▄▄▅▆▆▆▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86591
best_accuracy,0.87504
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▅▆▅▅▆▇▇▇█████▇██▇▆████████▇▇█▇▇▇▇▇▇▇
best_accuracy,▁▁▂▃▅▆▆▆▆▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86525
best_accuracy,0.87602
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▅▆▇▇▇▇█▇█▇▆▇█████▇█▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▄▅▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85481
best_accuracy,0.8708
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▄▆▅▆▅▇▇▄█▆█▇█▇▇▇█▇▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▃▄▄▆▆▆▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85579
best_accuracy,0.86688
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▆▇▇▇▇█▆████▇▇▆▇██▆█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▄▆▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84666
best_accuracy,0.85808
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▇▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.846
best_accuracy,0.86069
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▃▆▇▇▇▇▇▇▆▇▇█▆█▇█▆▄█▆███▇█▆▇▇█▇▇██▇▇▇
best_accuracy,▁▁▂▄▄▆▇▇▇▇▇▇▇▇▇█████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8584
best_accuracy,0.86852
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▅▇▆▅▅▇▇▇█▇▇█▇▇███████▇██▇▇█▇█▇█▇▇▇▇▇▇
best_accuracy,▁▃▄▅▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86525
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▆▆▆▅▄▆▇███▇▇▇▇██▇█▇▇█▇▇██▇▇█▇██▇███▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86362
best_accuracy,0.87471
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▆▆▇▅▇▇▇▃▇▇▇█▇▇▇▆███▇█▇█▇███▇█▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▆▆▇▇▇▇▇▇▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87145
best_accuracy,0.87993
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▃▇▃▅▇▇▇▇█▇▇██▇█▇██▇▇▇▇▇██▇█▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▅▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87406
best_accuracy,0.88581
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▄▃▆▇▇▇▇▇▆███▇██▇█▇█▆██▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▄▄▅▆▇▇▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86395
best_accuracy,0.88026
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▅▇▆▇▆▅▇███▆█▆██▇██▇▇▇▇▆▇█▇▇▇▇▇█▇█▇▇▇▇
best_accuracy,▁▃▆▆▇▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.86884
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▆▇▇▅▇▆▇▇▇▇██▆███████▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86069
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338705698649086, max=1.0…

  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▆▆▇▇▇▇▇▇█▇███▇█▇▇▇███▇█▇▇▇▇▇██▇▆▇▇▇▇▇
best_accuracy,▁▃▆▆▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86297
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▇▇▆▇▇▆▇▆█▇█▇▇██▇▆███████▇██▇▇▇██▇██▇▇
best_accuracy,▁▂▅▇▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86558
best_accuracy,0.87308
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▆▇▇▆▇▇▅▆█▇▇▇▇▇█▇██▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86493
best_accuracy,0.87569
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▆▆▅▇▇▇▇▇▇█▇███▇██▇█▇█▇▆▇▇▇▇▇▇██▇█▇▇█▇
best_accuracy,▁▃▆▆▆▆▆▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86166
best_accuracy,0.87406
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▆▅▇▇█▇▇▇▇▇█▇▇▇█▆▇▇█▇█▇▇▇███▇██▇█▇██▇█
best_accuracy,▁▃▄▆▆▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87667
best_accuracy,0.877
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▅▆█▇▇▆▇█████▇▅▇█▇▇▇▇▇▇▇█▇▇██▇▇▇▇▇█▇▇▇█
best_accuracy,▁▄▅▆████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86884
best_accuracy,0.87471
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▅▅▄█▇▇██▇▇▇█▇▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇
best_accuracy,▁▃▅▅▅▅██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86754
best_accuracy,0.8783
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▇▇▇▆█▇▇███▇▇███▇█▇▇▇▇██▇████▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▇▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86134
best_accuracy,0.87178
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▆▅█▇██▇▇▅▇▆▇▇▆▇▆▇▆▆▇▆▇▆▆▆▆▇▆▆▆▆▆▆▆▆▇▆
best_accuracy,▁▄▆▆▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85188
best_accuracy,0.87569
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▇█████████▇▇▇▇▇██▇▇▇█▆▇▇▇█▆▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▄▆▇▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85546
best_accuracy,0.86819
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▆▇█▇█▇█▇▇█▆██▇▇▇█▇▇▇▇▇█▇▆▇▇▇██▇▇▇▇▇▇▇
best_accuracy,▁▄▆▆▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8571
best_accuracy,0.86917
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▅▄▆▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▄▅▅▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86395
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▅▅▇▆▄██▇█▇██▇▇▇▇▇▇▆▇▅▇▇▇▇▇▇▆▇▇▆▇▇▆▇▇▇▇
best_accuracy,▁▄▅▅▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.87504
global_step,17280.0


# guardian

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

str_topic='0,1'
str_author='0,1'
num_sent_per_text = 2

split = 'train'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

split = 'val'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)
    
split = 'test'
filename = f"topic_{str_topic}_author_{str_author}_sent_{num_sent_per_text}_{split}.csv"
file = f'../../data/guardian/processed/{filename}'
df_test = pd.read_csv(file)
for col in cols_to_eval:
    df_test[col] = df_test[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False, True]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    test_loader = get_loader(df_test, limit = limit, add_syllables=add_syllables)
    num_test_steps = len(test_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN guardian (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    best_evaluation = collections.defaultdict(float)
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)
            
        # train acc
        model.eval()
        metric = evaluate.load('accuracy')
        for data in train_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        train_evaluation = metric.compute()
        
        # val
        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        val_evaluation = metric.compute()
        
        # test
        model.eval()
        metric = evaluate.load('accuracy')
        for data in test_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        test_evaluation = metric.compute()
        
        # logging
        evaluation = {'train_accuracy':train_evaluation['accuracy'], 'val_accuracy':val_evaluation['accuracy'], 'test_accuracy':test_evaluation['accuracy']}
        wandb.log(evaluation, step=pbar.n)
        for key in evaluation:
            best_evaluation[f'best_{key}'] = max(best_evaluation[f'best_{key}'], evaluation[key])
        wandb.log(best_evaluation, step=pbar.n)
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▄▅▅▅▅▅▅▅▅▅▅▅████
test_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▁▃▂▁▂
val_accuracy,▁▄▅▅▅▅▅▅▅▅▅▅▅█▅▅▅

0,1
best_test_accuracy,0.61776
best_val_accuracy,0.62091
test_accuracy,0.39575
val_accuracy,0.55856


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033345635732014975, max=1.0…

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁███████████████████████████████████████
best_trani_accuracy,▁▂▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
best_val_accuracy,▁▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇█████████████████
test_accuracy,██▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▃▂▃▂▄▃▃▃▃▃▃▃▂▃▃▃▃▃▃▃▃
trani_accuracy,▁▂▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
val_accuracy,▁▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▅▆▅▇▆▇▆█▇▇▇▇▇▇█▇██▇▇██▇█

0,1
best_test_accuracy,0.639
best_trani_accuracy,0.85113
best_val_accuracy,0.61335
test_accuracy,0.46139
trani_accuracy,0.84951
val_accuracy,0.60831


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
best_val_accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇██████████████
test_accuracy,██▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▂▃▂▄▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄
trani_accuracy,▁▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█████████████████████
val_accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▇▆▇▆█▇▇▇▇██▇▇██████████

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.87217
best_val_accuracy,0.6272
test_accuracy,0.48456
trani_accuracy,0.86893
val_accuracy,0.62469


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▅▅▆▆▆███████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▃▅▅▆▆▆███████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▅▂▆▃▃█▅▇█▆▆▇▆▆▇▆▇▇█▇▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▄▂▅▃▃▆▅▅▆▆▆▇▆▆▇▆▇▇█▇▇▇▇▇
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▃▂▃▅▂▆▄▄█▆▆█▆▆█▇▆▇▆▇▇█▇▇▇▇▇

0,1
best_test_accuracy,0.52317
best_trani_accuracy,0.88026
best_val_accuracy,0.63035
test_accuracy,0.5
trani_accuracy,0.86893
val_accuracy,0.61965


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▅▅████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▅▅▆▆▆▆▆▆██████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▅▅████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▂▁▁▃▃▄▃▄▅▅▅▄▅▆▇▄█▇▇▆▇█▇▇█▇▇█▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▅▅▄▅▅▆▅█▆▆▆▇█▇▇█▇▇█▇█
val_accuracy,▁▁▁▁▁▁▁▁▁▁▂▁▁▃▃▄▄▄▄▅▅▄▅▆▇▅█▆▆▆▇█▇▇█▇▇█▇▇

0,1
best_test_accuracy,0.50965
best_trani_accuracy,0.87055
best_val_accuracy,0.63917
test_accuracy,0.49421
trani_accuracy,0.8657
val_accuracy,0.62531


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▅▅▅▅▇▇▇▇▇█████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▅▅▅▅▆▆▆▆▇█████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▅▅▆▆▇▇▇▇▇█████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▄▅▃▅▆▅▄▆▅██▆▇▆▇▇▇██▇█▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▃▄▄▄▅▆▅▅▆▅█▇▆█▇█████▇██
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▃▄▅▃▅▇▅▅▇▇██▇▇▇▇█▇█████

0,1
best_test_accuracy,0.52703
best_trani_accuracy,0.87379
best_val_accuracy,0.64924
test_accuracy,0.50772
trani_accuracy,0.86731
val_accuracy,0.64547


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▃▃▄▄▅▅▅▅▅███████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▄▄▆▇▇▇▇███████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▃▃▅▅▅▅▅▅▆███████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▅▃▂▂▅█▄▄▇▇▅▅▇▅▅▇▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▂▂▅▃▃▃▆▇▅▅▇█▆▆█▆▆█▇▇██
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▅▃▃▃▆█▅▅██▅▅█▅▇█▇▇█▇

0,1
best_test_accuracy,0.5
best_trani_accuracy,0.85922
best_val_accuracy,0.61839
test_accuracy,0.46911
trani_accuracy,0.85599
val_accuracy,0.61083


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▄▄▄▅▅█████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▇▇▇▇▇▇▇▇▇▇▇██████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▄▅▅█████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▂▄▅▄█▄▅▅▄▄▃▅▅▇▆▆▆▅▆▇▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▃▄▃▇▃▆▅▄▅▄▆▆▆▆██▇▇▇█████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▂▄▅▃█▄▅▅▄▄▄▅▆▇▇▆▇▆▇█▇█▇▇▇

0,1
best_test_accuracy,0.55019
best_trani_accuracy,0.84466
best_val_accuracy,0.61965
test_accuracy,0.5
trani_accuracy,0.84304
val_accuracy,0.6102


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████████
best_val_accuracy,▁▄▄▄▄▄▄▄▄▄▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████
test_accuracy,█▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▃▂▂▃▃▂▂▃▃▄▅▃▆▄▃▄▃▄▄▄▄▄▄▄
trani_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇█████████████████
val_accuracy,▁▄▄▄▄▄▄▄▄▄▄▄▆▅▅▅▇▆▆▆▆▆▅▆▆▇▇▆█▇▇▇▇▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.85599
best_val_accuracy,0.6272
test_accuracy,0.48649
trani_accuracy,0.85437
val_accuracy,0.61083


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▄▄▄▆▆▆▇▇▇▇▇▇▇▇▇████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▂▇▂▄▂▇▄▆▇▆▅▆▅▅▆▇▄▆█▇▆▇█▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▂▄▂▆▃▆▇▆▅▆▅▆▇▇▄▆█▇▇▇█▇▇
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▃▇▂▆▃▇▅▇█▇▆▇▆▆▇█▅▆██▇▇█▇▇

0,1
best_test_accuracy,0.57336
best_trani_accuracy,0.86408
best_val_accuracy,0.63917
test_accuracy,0.50579
trani_accuracy,0.84951
val_accuracy,0.6165


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆▆██████████
best_trani_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████████
best_val_accuracy,▁▂▂▂▂▂▂▂▃▃▃▃▅▅▅▅▅▆▇▇▇▇▇▇▇▇▇▇████████████
test_accuracy,█▁▁▁▁▁▁▁▁▂▂▁▅▄▄▄▂▅▅▄▄▅▃▄▆▅▇▅█▄█▅▇▆▇▇▇▆▇▇
trani_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇█████▇██████████
val_accuracy,▁▂▂▂▂▂▂▂▂▃▃▂▅▄▄▄▃▆▆▅▅▆▄▅▆▅▇▆█▅█▆▆▆▇▇▇▇▇▇

0,1
best_test_accuracy,0.49807
best_trani_accuracy,0.86246
best_val_accuracy,0.63413
test_accuracy,0.47297
trani_accuracy,0.84951
val_accuracy,0.61461


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▂▂▂▂▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▃▃▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████
best_val_accuracy,▁▁▁▁▁▁▁▃▃▃▃▅▅▅▇▇▇▇██████████████████████
test_accuracy,▁▁▁▁▁▁▁▂▂▁▂▅▅▃▆▆▅▆▇▄▆▄▅▆▆▆▇▇▅▆▇▆▇█▆▆▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▂▃▃▂▄▄▄▄▅▄▆▄▅▆▇▇▇▇▆▇▇▇▇▇▇▇████
val_accuracy,▁▁▁▁▁▁▁▃▃▁▂▅▅▄▇▇▅▇█▅▇▄▆▇▇▇██▅▆▆▆██▆▆▇▇▇▇

0,1
best_test_accuracy,0.58494
best_trani_accuracy,0.94175
best_val_accuracy,0.65932
test_accuracy,0.54633
trani_accuracy,0.94175
val_accuracy,0.63665


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▆▇▇██████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇███████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▆▆▆██████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▆▅▄█▆▆▆▆▆█▆▇▇▇▆▇▆▆▇▆▇▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇███████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▆▅▄█▆▆▆▇▆█▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.60039
best_trani_accuracy,0.95146
best_val_accuracy,0.67884
test_accuracy,0.54247
trani_accuracy,0.94498
val_accuracy,0.65239


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03334016005198161, max=1.0)…

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
best_val_accuracy,▁▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇███████████████████
test_accuracy,█▁▁▁▁▁▁▁▁▂▂▄▂▃▄▄▂▇▆▂▆▇▆▅▆▅▇▆▇▆▆▆▇▇▆▅▆▆▆▆
trani_accuracy,▁▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
val_accuracy,▁▃▄▄▄▄▄▄▄▅▅▆▄▅▆▆▅▇▇▅▇█▇▇█▇████████▇▇▇██▇

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.94984
best_val_accuracy,0.66688
test_accuracy,0.54247
trani_accuracy,0.93689
val_accuracy,0.64736


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▂▂▃▅▆▆▆▆▆▇▇▇▇██████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇█████████
best_val_accuracy,▁▁▁▁▁▁▁▁▂▂▂▄▅▆▆▆▆▆▇▇▇▇██████████████████
test_accuracy,▁▁▁▁▁▁▁▁▂▁▁▃▅▆▃▃▃▅▇▆▆▇█▇▆▄▇▆▇█▇▇▇▇██▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▃▄▂▃▂▄▅▅▅▆▆▆▆▅▇▆▇▇▇█████████
val_accuracy,▁▁▁▁▁▁▁▁▂▁▂▄▅▆▃▃▃▅▇▆▆▇█▇▆▅▆▆▇▇▇▆▇▆▇▇▇▆▇▇

0,1
best_test_accuracy,0.57336
best_trani_accuracy,0.95469
best_val_accuracy,0.66814
test_accuracy,0.55212
trani_accuracy,0.95469
val_accuracy,0.64358


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▂▅▅▅▅▆▆▆▆▆█████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▂▂▃▃▃▃▄▄▄▅▆▆▆▆▆▇▇▇▇▇████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▂▂▅▅▅▅▅▅▅▆▆█████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▂▅▅▅▂▆▅▄▆▄█▅█▆▆▆▆▄▄▇▅▆▇▆▆█▅▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▂▁▃▃▃▂▄▄▄▅▅▆▅▆▆▇▇▇▅▆█▇▇█████████
val_accuracy,▁▁▁▁▁▁▁▁▂▂▅▅▅▂▅▅▄▆▄█▅█▆▆▆▆▅▅▇▅▆▇▇▇█▅▇▆▆▇

0,1
best_test_accuracy,0.60039
best_trani_accuracy,0.94984
best_val_accuracy,0.68766
test_accuracy,0.54826
trani_accuracy,0.94822
val_accuracy,0.65869


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
best_val_accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▅▅▇▇▇▇▇▇▇▇██████████████████
test_accuracy,██▁▁▁▁▁▁▁▁▁▁▁▂▃▅▅▅▅▅▄▃▆▅▆▄▄▅▆▅▅▅▅▆▆▅▆▅▅▅
trani_accuracy,▁▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
val_accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▄▅▆██▇██▇▆█▇█▇▇▇█▇▇▇▇██▇██▇█

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.92718
best_val_accuracy,0.65554
test_accuracy,0.53282
trani_accuracy,0.92557
val_accuracy,0.63413


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▂▄▆▆▆▆▆▆▆▆▆████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▃▃▃▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
best_val_accuracy,▁▂▂▂▂▂▂▂▂▃▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▂▅█▃▆▅▄▆████▇▇▆▇▇▇▇▆▅▆▇▇▇▆▇▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▂▃▂▃▃▄▃▄▅▅▆▆▆▆▆▇▇▇▇▇▆▇▇▇█▇█████
val_accuracy,▁▂▂▂▂▂▂▂▂▃▅▆▄▆▅▅▆▇▇██▇▇▆▇▇██▇▆▆▇▇█▇▇▇█▇▇

0,1
best_test_accuracy,0.60811
best_trani_accuracy,0.95307
best_val_accuracy,0.67128
test_accuracy,0.52896
trani_accuracy,0.95307
val_accuracy,0.64673


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▅▅▅▅▅▅▅▅████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇██████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▂▄▄▄▄▅▅▅▆▆▆▆▆▆▆████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▅▄▅▄▂▄▅▅█▆▆▄▆▄▆▅▅▆▅▅▆▅▅▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▂▂▃▃▄▄▄▄▅▄▅▅▆▅▇▇▆▇▇█▇▇███████
val_accuracy,▁▁▁▁▁▁▁▁▁▂▄▄▃▄▄▅▅▆▆▅▄▆▅▆█▇▇▆▇▆█▇▆▇▇▇██▇▇

0,1
best_test_accuracy,0.60811
best_trani_accuracy,0.93204
best_val_accuracy,0.64421
test_accuracy,0.52896
trani_accuracy,0.93204
val_accuracy,0.63602


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▅▅▅▅▇▇▇▇▇▇▇▇▇▇▇███████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▄▄▄▅▅▆▆▆▆▆▆▆▆▆███████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▄▄▄▄▆▆▆▆▇▇▇███████████████████
test_accuracy,▁▁▁▁▁▁▁▁▁▁▃▂▃▃▅▄▂▃▆▄▆▇▅▄▇▆▅▆▄█▇▇▆▇▇▇▆▆▇▇
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▄▃▃▄▃▆▆▅▅▆▆▆▆▅█▇▇▇███████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▄▃▄▄▆▅▃▄▆▄▇█▅▅▇▇▆▆▅█▇▇▆▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.57529
best_trani_accuracy,0.95469
best_val_accuracy,0.65113
test_accuracy,0.5444
trani_accuracy,0.95146
val_accuracy,0.63665


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▂▂▃▆▆▆▇▇▇█████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████████
best_val_accuracy,▁▁▁▁▁▁▃▃▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████
test_accuracy,▁▁▁▁▁▁▂▁▃▆▆▃▇▇▇█▆▄▄█▄▄▆█▆▅▆▄▇▇▅▇▇▆█▅▇▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▄▃▄▄▃▅▃▄▅▅▆▅▆▅▇▇▇▇███▇█▇██
val_accuracy,▁▁▁▁▁▁▃▂▅▆▆▃▇▆▇▇▆▅▄▇▄▅▇█▇▆▇▆▇▇▆▇█▇█▇█▇▇▇

0,1
best_test_accuracy,0.57722
best_trani_accuracy,0.93689
best_val_accuracy,0.67191
test_accuracy,0.52124
trani_accuracy,0.93689
val_accuracy,0.64987


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁██████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▃▃▃▃▅▅▆▆▆▆▆▆▇▇▇▇▇███████████████
best_val_accuracy,▁▁▁▁▁▂██████████████████████████████████
test_accuracy,▁▁▁▁▁▁█▁▄▃▃▂▅▅▆▄▆▄▆▄▇▅▅▅▅▅▆▅▃▅▅▆▅▆▄▅▅▄▅▅
trani_accuracy,▁▁▁▁▁▁▁▁▃▂▃▃▅▅▆▅▆▅▆▆▇▇▇▇▇███▇███████████
val_accuracy,▁▁▁▁▁▂█▁▄▃▄▃▆▆▇▅▇▅▇▅█▅▆▆▆▆▇▆▅▆▇▇▆▇▆▇▆▆▆▆

0,1
best_test_accuracy,0.6332
best_trani_accuracy,1.0
best_val_accuracy,0.6568
test_accuracy,0.51931
trani_accuracy,1.0
val_accuracy,0.62594


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▃▃▃▃▄▄████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▂▂▂▃▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇██████████████
best_val_accuracy,▁▁▁▁▁▁▃▃▃▃▅▅████████████████████████████
test_accuracy,▁▁▁▁▁▁▃▂▂▃▄▃▆▃▇▄▅▅▃▇▄▇▇▅█▅▆▆▅▄▅▇▆▇▅▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▂▁▂▃▄▃▅▄▅▄▆▅▄▆▆▇▆▆▇▇██▇▇▇█████████
val_accuracy,▁▁▁▁▁▁▃▂▂▃▅▃▇▄▇▄▅▅▄▇▅▇█▅█▆▇▇▆▅▆▇▆▇▆▆▆▆▆▆

0,1
best_test_accuracy,0.64093
best_trani_accuracy,0.99838
best_val_accuracy,0.68451
test_accuracy,0.54826
trani_accuracy,0.99838
val_accuracy,0.64295


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▂▂▂▂▄▆▆▆▆█████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▄▄▄▅▅▆▆▆▆▆▆▇▇▇███████████████
best_val_accuracy,▁▁▁▁▁▁▂▂▂▂▄▅▆▆▆▇▇▇▇▇▇▇██████████████████
test_accuracy,▁▁▁▁▁▁▂▂▂▂▄▆▆▄▅█▇▅▇▅▃▅▇▅▆▆▅▄▅▆▅▅▆▆▅▅▅▆▅▅
trani_accuracy,▁▁▁▁▁▁▁▁▁▁▃▄▄▄▅▅▆▆▆▆▄▆▇▇▇██▇████████████
val_accuracy,▁▁▁▁▁▁▂▂▂▂▄▅▆▄▅▇▆▅▇▅▄▅█▆▆▆▆▅▇▇▇▆▇▇▆▆▆▇▆▆

0,1
best_test_accuracy,0.63514
best_trani_accuracy,0.99838
best_val_accuracy,0.6864
test_accuracy,0.53861
trani_accuracy,0.99838
val_accuracy,0.6461


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▂▄▄▄▄▇▇▇▇▇▇▇███████████████████████
best_trani_accuracy,▁▁▁▁▁▁▃▃▃▃▄▄▄▅▆▆▆▆▆▇▇▇▇█████████████████
best_val_accuracy,▁▁▁▁▁▁▄▄▄▄▇▇▇▇▇▇▇███████████████████████
test_accuracy,▁▁▁▁▁▂▄▂▄▂▇▆▆▇▇▆▃█▆██▇▇▇▇█▇█▆▇█▇▅▆▆▇▇▇▇▆
trani_accuracy,▁▁▁▁▁▁▃▂▃▂▄▄▄▅▆▆▃▆▆▇▇▇▇█████▇███████████
val_accuracy,▁▁▁▁▁▂▄▂▄▃▇▆▆▇▇▆▃█▇▇▇▇▇▇▇█▇█▆▇█▇▅▆▇▇▇▇▇▇

0,1
best_test_accuracy,0.59459
best_trani_accuracy,1.0
best_val_accuracy,0.67569
test_accuracy,0.53475
trani_accuracy,1.0
val_accuracy,0.65302


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_trani_accuracy,▁▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
best_val_accuracy,▁▄▄▄▄▄▄▄▄▄▆▆▆███████████████████████████
test_accuracy,█▁▁▁▁▁▁▁▁▁▃▅▃▆▅▄▆▅▆▆▇▃▅▅▇▄▇▅▅▆▆▆▆▆▅▆▅▆▆▆
trani_accuracy,▁▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██▇██████████████
val_accuracy,▁▄▄▄▄▄▄▄▄▄▆▆▆█▇▆▇▇▇▇█▆▆▇█▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
best_test_accuracy,0.62162
best_trani_accuracy,0.98706
best_val_accuracy,0.65302
test_accuracy,0.53668
trani_accuracy,0.98706
val_accuracy,0.62406


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▂▃▆▆▆▆▆▆▆▆▆▇▇▇████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▂▂▂▄▄▄▄▆▆▆▆▇▇▇▇▇▇████████████████
best_val_accuracy,▁▁▁▁▁▁▂▃▅▅▅▆▆▆▇▇▇███████████████████████
test_accuracy,▁▁▁▁▁▁▂▃▇▂▂▆▃▃▆▇▆██▇▅▅▆▆▅▅█▄▆▇▅▅▅▅▆▇▅▅▆▅
trani_accuracy,▁▁▁▁▁▁▁▂▂▂▂▄▃▃▆▅▆▆▆▇▆▇▇▇█▇█▆██▇█████████
val_accuracy,▁▁▁▁▁▁▂▃▆▃▃▇▄▄▇▇▆█▇█▅▆▆▆▆▆█▅▆▇▆▆▆▆▇▇▆▆▆▆

0,1
best_test_accuracy,0.70463
best_trani_accuracy,0.99676
best_val_accuracy,0.69773
test_accuracy,0.54633
trani_accuracy,0.99515
val_accuracy,0.64861


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▇▇▇▇▇▇▇▇████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇██████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▇▇▇▇▇▇▇▇████████████████████████
test_accuracy,▁▁▁▁▁▁▁▁█▁▆▇▇▃▆▆▆▇▆▄▇▅▅▅▅▇▆▅▅▆▆▆▅▆▅▅▆▅▅▆
trani_accuracy,▂▂▂▂▂▂▂▂▁▂▄▄▄▃▅▅▆▆▆▅▆▅▇▆▆▇██████████████
val_accuracy,▁▁▁▁▁▁▁▁█▁▇██▄▇███▇▄█▆▇▇▆▇▇▇▆▇▇▇▇▇▆▆▇▇▇▇

0,1
best_test_accuracy,0.68533
best_trani_accuracy,0.99838
best_val_accuracy,0.66499
test_accuracy,0.56371
trani_accuracy,0.99838
val_accuracy,0.63476


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▄▄▄▄▄▄▆▆▆▆▆█████████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▅▅▅▅▅▅██████████████████████████
test_accuracy,▁▁▁▁▁▁▁▁▄▁▂▂▃▁▄▃▅▄▅█▃▆▅▅▄▃▆▅▄▅▅▄▄▅▄▄▅▅▅▅
trani_accuracy,▁▁▁▁▁▁▁▁▃▁▂▂▄▂▄▄▅▄▅▄▄▆▆▆▅▆▇▇▆▇▇▇▇███████
val_accuracy,▁▁▁▁▁▁▁▁▅▁▃▂▅▂▅▄▆▅▆█▄▇▇▆▆▆▇▇▆▇▆▆▆▇▆▆▇▇▇▇

0,1
best_test_accuracy,0.70077
best_trani_accuracy,0.9822
best_val_accuracy,0.64547
test_accuracy,0.55405
trani_accuracy,0.9822
val_accuracy,0.6272


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▂▂▄▄▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
best_val_accuracy,▁▁▁▁▁▁▁▁▂▃▃▄▄▄▅▅▅▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
test_accuracy,▁▁▁▁▁▁▁▁▂▂▁▄▃▃▆▄▅█▆▇▇▅▇▅▆▇▅▇▅▆▇▆▇▇▆▆▆▇▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▁▂▁▂▂▂▄▃▃▅▅▅▆▄▆▅▆▇▆▇▆▇▇▇████████
val_accuracy,▁▁▁▁▁▁▁▁▂▃▁▄▃▄▆▄▅▇▆▇█▅▇▅▆▇▅█▅▇▇▇██▇▇▇█▇▇

0,1
best_test_accuracy,0.65058
best_trani_accuracy,0.98544
best_val_accuracy,0.66877
test_accuracy,0.56178
trani_accuracy,0.9822
val_accuracy,0.63161


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▂▂▂▂▅▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
best_trani_accuracy,▁▁▁▁▁▁▂▂▂▂▃▃▄▄▄▅▅▅▅▅▅▆▇▇▇▇▇█████████████
best_val_accuracy,▁▁▁▁▁▁▂▂▃▃▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇██████████████
test_accuracy,▁▁▁▁▁▁▂▂▂▂▅▂▇▅▄▇▆█▅▅▅▆▇▇▆▆▆▇▇▇▆▆▆▇▇▇▇▇▇▇
trani_accuracy,▁▁▁▁▁▁▂▂▂▂▃▂▄▄▄▅▅▄▅▅▅▆▇▇▆▇▇████▇▇███████
val_accuracy,▁▁▁▁▁▁▃▃▃▂▅▂▆▅▅▇▇▇▅▅▆▇▇▇▇▇▇███▇▇▆▇▇█▇▇▇▇

0,1
best_test_accuracy,0.62741
best_trani_accuracy,0.99515
best_val_accuracy,0.66688
test_accuracy,0.57336
trani_accuracy,0.99515
val_accuracy,0.64169


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▃▅▅▆▆██████████████████████████████
best_trani_accuracy,▁▁▁▁▁▂▂▂▃▄▄▅▆▆▇▇████████████████████████
best_val_accuracy,▁▁▁▁▂▄▆▆▇▇██████████████████████████████
test_accuracy,▁▁▁▁▁▄▃▃▆▅▃▆▆▄▇▅▄▃▅█▅▆▆▄▆▅▅▅▅▅▄▅▅▆▅▅▅▅▅▅
trani_accuracy,▁▁▁▁▁▂▂▂▃▄▃▅▆▆▇▇█▆█▇████████████████████
val_accuracy,▁▁▁▁▂▅▃▄▇▆▄▇█▅█▆▅▄▅█▇▇▇▅▇▆▆▆▆▆▅▇▆▇▆▆▇▆▆▆

0,1
best_test_accuracy,0.66988
best_trani_accuracy,1.0
best_val_accuracy,0.69773
test_accuracy,0.51737
trani_accuracy,1.0
val_accuracy,0.64673


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▂▃▅▅▇▇▇▇▇▇▇▇▇▇██████████████████
best_trani_accuracy,▁▁▁▁▁▁▁▁▂▂▄▄▅▅▆▆▇▇██████████████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▂▄▆▆▇▇▇▇▇▇▇▇▇▇██████████████████
test_accuracy,▁▁▁▁▁▁▁▁▂▃▃▃█▇▇▆██▅▅▅▅▅▆▇▆▆▆▅▄▅▆▆▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▁▂▂▃▃▅▅▆▆▇▇█▇█▇██████████████████
val_accuracy,▁▁▁▁▁▁▁▁▃▄▄▄▇███▇▇▆▅▆▅▆▆█▇▆▆▅▅▆▇▇▆▇▇▆▇▆▆

0,1
best_test_accuracy,0.62741
best_trani_accuracy,1.0
best_val_accuracy,0.67254
test_accuracy,0.53282
trani_accuracy,1.0
val_accuracy,0.63161


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▂▄▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████
best_trani_accuracy,▁▁▁▁▂▃▃▃▄▄▆▆▆▇▇▇▇▇██████████████████████
best_val_accuracy,▁▁▁▁▂▅▆▆████████████████████████████████
test_accuracy,▁▁▁▁▂▅▄▂█▃▂▅▅▆█▆▄▅▆▇▇▇▆▇▆▆▆▆▄▅▅▆▇▅▅▆▆▆▆▆
trani_accuracy,▁▁▁▁▂▃▃▂▃▄▂▆▆▇▆▇▇▇██████████████████████
val_accuracy,▁▁▁▁▂▅▅▂█▄▃▆▆▇█▆▅▆▇▇▇▇▆▇▆▆▆▇▅▅▆▆▇▆▆▆▆▆▆▆

0,1
best_test_accuracy,0.64672
best_trani_accuracy,1.0
best_val_accuracy,0.70403
test_accuracy,0.55985
trani_accuracy,1.0
val_accuracy,0.66436


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▂▂▅▅████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▂▃▄▄▄▅▅▇▇▇▇████████████████████████
best_val_accuracy,▁▁▁▁▂▂▆▆████████████████████████████████
test_accuracy,▁▁▁▁▂▂▆▆▅▄▂▄▅▆▆█▇█▆▃▆▇▆▆▅▅▆▅▆▆▆▄▆▆▆▆▆▆▇▆
trani_accuracy,▁▁▁▁▁▂▃▄▄▄▃▅▇▇▇▇█▇█▆████████████████████
val_accuracy,▁▁▁▁▂▂▆▇▆▅▃▅▆▆▆█▇█▆▃▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆

0,1
best_test_accuracy,0.68147
best_trani_accuracy,1.0
best_val_accuracy,0.69081
test_accuracy,0.54633
trani_accuracy,1.0
val_accuracy,0.64043


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▃▃▄▄████████████████████████████████
best_trani_accuracy,▁▁▁▁▁▁▂▃▄▄▆▆▇▇██████████████████████████
best_val_accuracy,▁▁▁▁▃▃▄▄████████████████████████████████
test_accuracy,▁▁▁▁▃▁▂▄█▂▆█▆▇▅▄▄▅▇█▆▆▄▅▆▃▆▅▅▆▆▆▆▆▆▆▆▆▆▆
trani_accuracy,▁▁▁▁▁▁▁▃▄▂▆▄▇▇█▇▇██▇█████▇██████████████
val_accuracy,▁▁▁▁▃▁▂▄█▂▆█▆▇▆▄▅▅▇█▅▆▄▅▇▃▇▅▄▇▆▇▆▆▇▆▆▇▆▆

0,1
best_test_accuracy,0.62162
best_trani_accuracy,1.0
best_val_accuracy,0.67884
test_accuracy,0.54826
trani_accuracy,1.0
val_accuracy,0.64673


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▂▄▄▄▅▅▇▇▇▇▇▇▇███████████████████████
best_trani_accuracy,▁▁▁▁▁▂▃▃▃▄▄▄▆▆▇▇▇▇██████████████████████
best_val_accuracy,▁▁▁▁▂▅▅▅▆▆▇▇▇▇▇▇▇███████████████████████
test_accuracy,▁▁▁▁▂▄▃▂▁▅▂▃▅▂▅▇▂█▃▅▇▆▇▆▇▇██▆█▆▆▆▇▇▆▅▇▅▆
trani_accuracy,▁▁▁▁▁▂▃▂▁▄▃▄▆▂▇▇▅▇▆█████████████████████
val_accuracy,▁▁▁▁▂▅▄▃▂▆▃▄▆▂▅▇▃█▃▅▇▅▇▆▇▇▇▇▆▇▆▅▅▆▆▅▅▆▅▅

0,1
best_test_accuracy,0.59653
best_trani_accuracy,1.0
best_val_accuracy,0.67003
test_accuracy,0.53668
trani_accuracy,1.0
val_accuracy,0.6165


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▃▃▄▄▄▄▆▆▆▆▆▆████████████████████████
best_trani_accuracy,▁▁▁▁▂▂▄▄▅▆▆▇████████████████████████████
best_val_accuracy,▁▁▁▁▅▅▅▅▆▆▇▇▇▇▇▇████████████████████████
test_accuracy,▁▁▁▁▂▁▄▄▄▅▄▅▇▂▆▇▇▅█▄▇▅▆▆▅▄▅▇▆▆▆▆▆▆▅▅▅▆▆▆
trani_accuracy,▁▁▁▁▂▁▄▄▅▆▆▇█▄██████████████████████████
val_accuracy,▁▁▁▁▄▁▆▅▅▆▅▆█▂▇█▆▅█▅█▆▆▆▅▅▆▇▇▆▇▆▆▆▆▆▆▆▇▇

0,1
best_test_accuracy,0.66216
best_trani_accuracy,1.0
best_val_accuracy,0.66751
test_accuracy,0.52896
trani_accuracy,1.0
val_accuracy,0.62972


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▅▅██████████████████████████████
best_trani_accuracy,▁▆▆▆▆▆▆▆▇▇▇▇████████████████████████████
best_val_accuracy,▁▄▄▄▄▄▄▄▇███████████████████████████████
test_accuracy,▇▁▁▁▁▁▂▁▄▇█▄▆▃▇▆▇▅▆▆▅▄▄▇▅▅▅▄▅▄▅▅▅▆▅▅▅▅▅▅
trani_accuracy,▁▆▆▆▆▆▆▆▇▇▇▇█▇██████████████████████████
val_accuracy,▁▄▄▄▄▄▄▄▆██▆▇▅███▇▇▇▇▆▆█▇▇▇▆▇▆▇▇██▇▇▇▇▇▇

0,1
best_test_accuracy,0.67375
best_trani_accuracy,1.0
best_val_accuracy,0.67065
test_accuracy,0.53089
trani_accuracy,1.0
val_accuracy,0.63413


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▂▆▆▆▆▆████████████████████████████
best_trani_accuracy,▁▂▂▂▂▂▂▂▂▃▅▅▆▆▆▇████████████████████████
best_val_accuracy,▁▂▂▂▂▂▃▆▆▆▇▇████████████████████████████
test_accuracy,▁▁▁▁▁▁▁▆▁▃▆▆█▃▄▄▅▅▅▅▃▆▄▆▅▅▄▅▄▄▅▅▅▄▅▅▄▅▅▅
trani_accuracy,▁▂▂▂▂▂▂▂▂▃▅▅▃▅▆▇████▇█▇█████████████████
val_accuracy,▁▂▂▂▂▂▂▆▂▅▇▇█▅▅▆▆▆▆▆▅▇▅▇▆▆▆▆▆▅▆▆▆▆▆▆▅▆▆▆

0,1
best_test_accuracy,0.66795
best_trani_accuracy,1.0
best_val_accuracy,0.67821
test_accuracy,0.53668
trani_accuracy,1.0
val_accuracy,0.6335


data 51 too long length 148
data 132 too long length 141
data 133 too long length 145
data 155 too long length 128
data 156 too long length 147
data 222 too long length 145
data 285 too long length 131
data 286 too long length 156
data 375 too long length 147
data 384 too long length 127
data 431 too long length 162
data 53 too long length 153
data 54 too long length 148
data 65 too long length 137
data 66 too long length 200
data 74 too long length 137
data 75 too long length 127
data 386 too long length 155
data 387 too long length 156
data 401 too long length 139
data 461 too long length 127
data 462 too long length 149
data 463 too long length 144
data 640 too long length 131
data 671 too long length 137
data 672 too long length 174
data 773 too long length 137
data 836 too long length 160
data 837 too long length 200
data 876 too long length 138
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

  0%|          | 0/1200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_test_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████████████████████
best_trani_accuracy,▁▆▆▆▆▆▇▇▇▇▇▇████████████████████████████
best_val_accuracy,▁▄▄▄▄▅▆▆▇▇▇▇▇▇██████████████████████████
test_accuracy,█▁▁▁▁▃▃▃▄▃▄▅▅▄▅▅▅▅▄▅▅▆▅▄▅▄▅▅▅▆▆▇▅▅▅▅▅▅▅▅
trani_accuracy,▁▆▆▆▆▆▇▇▇▇▇▇█▇██████████████████████████
val_accuracy,▁▄▄▄▄▅▅▅▆▅▆▇▇▆▇▇▇▆▇█▇█▇▆█▇▇▇█▇▇█▇▇▇▇▇█▇▇

0,1
best_test_accuracy,0.639
best_trani_accuracy,1.0
best_val_accuracy,0.66058
test_accuracy,0.51544
trani_accuracy,1.0
val_accuracy,0.64169
