In [None]:
from nltk.parse.corenlp import CoreNLPParser,CoreNLPDependencyParser
from tqdm.auto import trange, tqdm
from dataclasses import dataclass
import pandas as pd
import ast
import itertools
import wandb
import evaluate
from itertools import cycle
import numpy as np
import random
import time
from datetime import datetime

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GATv2Conv, TransformerConv, SAGEConv, GraphConv, ResGatedGraphConv, ChebConv, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

from transformers import AutoTokenizer, get_scheduler
from transformers.models.bert.modeling_bert import BertModel


In [None]:
import transformers
transformers.__version__

'4.18.0'

In [None]:
import torch_geometric as pyg
pyg.__version__

'2.2.0'

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# definitions

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [None]:
def freeze_model(model, freeze_bert):
    '''
    if freeze_bert == True, freeze all layer. 
    if freeze_bert is a positive integer, freeze the bottom {freeze_bert} attention layers
    negative integer should also work
    '''
    if freeze_bert==True:
        for param in model.parameters():
            param.requires_grad = False
    elif isinstance(freeze_bert, (int, np.int32, np.int64, torch.int32, torch.int64)):
        for param in model.embeddings.parameters():
            param.requires_grad = False  
        for layer in model.encoder.layer[:freeze_bert]: 
            for param in layer.parameters():
                param.requires_grad = False  
    return model

In [None]:
@dataclass
class myGNNoutput:
    loss: None
    logit: None
    emb: None

In [None]:
def get_loader(df, add_syllables=False, col='pos_seqs', limit=None, batch_size=32, shuffle=True, max_length=128):
    data_list = []
    if limit is not None:
        dfnew = df.sample(frac=1).reset_index(drop=True)[:limit]
    else:
        dfnew = df
    data_list = []
    for i in range(len(dfnew)):
        curr = df.iloc[i]
        data = Data()
        data.edge_index = torch.tensor(curr['homo_edges']).T
        if data.edge_index.shape[1] > max_length-1:
            print(f"data {i} too long length {data.edge_index.shape[1]}")
            continue
        
        tokens = tokenizer(' '.join(curr[col]), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
        data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
        data.y = torch.tensor([curr['author']])
        if add_syllables:
            
            data.num_syllables = torch.tensor([17]+curr['num_syllables']+[17])
            
        data_list.append(data)

    loader = DataLoader(data_list, batch_size=batch_size, shuffle=shuffle)
    return loader


In [None]:
GNNtype2layer = {'GCNConv':GCNConv, 
                 'ChebConv':ChebConv, 
                 'SAGEConv':SAGEConv, 
                 'GraphConv':GraphConv,
                 'ResGatedGraphConv':ResGatedGraphConv, 
                 'GATConv':GATConv, 
                 'GATv2Conv':GATv2Conv}

class myHomoGNN(torch.nn.Module):
    def __init__(self, num_layers, num_classes, add_self_loops=False, gnntype='GCNConv', add_syllables=False):
        super().__init__()
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.add_self_loops = add_self_loops
        self.GNNlayer = GNNtype2layer[gnntype]
        self.pos_emb_dim = 32 # this is determined by POS Bert
        
        self.add_syllables = add_syllables
        if add_syllables:
            self.num_syllables = 18 # the longest word has 17 syllables
            self.syllable_emb_layer = nn.Embedding(self.num_syllables, self.pos_emb_dim)
        
        self.gnns = nn.ModuleList()
        for i in range(num_layers):
            if gnntype in ['GCNConv', 'GATConv', 'GATv2Conv']:
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim, add_self_loops=self.add_self_loops))
            elif gnntype == 'ChebConv':
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim, K=2))
            else:
                self.gnns.append(self.GNNlayer(self.pos_emb_dim, self.pos_emb_dim))
                
        self.classifier = nn.Linear(self.pos_emb_dim, self.num_classes)
        self.lossfn = nn.CrossEntropyLoss()
        
    def forward(self, x, edge_index, batch, y, ptr, num_syllable=None, readout='pool'):
        if self.add_syllables:
            syllable_emb = self.syllable_emb_layer(num_syllable)
            x = x + syllable_emb
        
        for i in range(self.num_layers):
            x = self.gnns[i](x, edge_index)
            x = F.relu(x)
        
        if readout == 'pool':
            x = global_mean_pool(x, batch) 
        elif readout == 'cls':
            x = x[ptr[:-1],:]
        
        x = F.dropout(x, training=self.training)
        logit = self.classifier(x)
        loss = self.lossfn(logit, y)
        return myGNNoutput(loss=loss, logit=logit, emb=x)

In [None]:
# load pretrained POS Bert
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# CCAT50

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

## testing dataset size only

In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val)
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv']
ADD_SELF_LOOPS = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype
                     )
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="homo POS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▂▄▄▄▅▆█▆▇█▇████████▇████████▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.66667
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▆▃▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇▇█▇▇█▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73333
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▄▅▅▅▅▆▆▆▇▇▇▆████▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▂▄▂▅▂▅▅▅▅▅▆▅▆▆▅▆▇▇▇▇▇▇▇▇▇▇█▇█
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67273
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇█▇███▇███▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.55758
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▅▃▄▄▆▅▇▆▆▇▅▇▅▅█▇▇▇▇▆▆▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▄▅▅▄▅▅▇▅▅▅█▇▆▇█▇▇▇█▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.60303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▇██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333923021952311, max=1.0)…

  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


## testing upos

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
checkpoint = '/scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/upos_mlm_corenlp/run_2/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
preset_epochs = 60

warmup_ratio = 0.15

valid_loader = get_loader(df_val, col='upos_seqs')
num_valid_steps = len(valid_loader)

LIMIT = [1250, 1000, 750, 500, 250]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv']
ADD_SELF_LOOPS = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops = args
    
    epochs = 1250*60//limit
    seed = np.random.randint(100)
    set_seed(seed)
    
    train_loader = get_loader(df, col = 'upos_seqs', limit = limit)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    
    run = wandb.init(project="homo UPOS GNN (dataset size)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▇▇▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▇██▇█▇████▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6303
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▆▆▆▆▆▅▆▆▆▇▇▆▇▆▇▇▇▆███▇██▇██▇█████▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.62424
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▃▄▆▇▇▇▇▇▇▇█▇▇█████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.63333
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▆▆▆█▅▆▅▅▆▆▅▆▆▆▇▆▆▆▆▆▇▇▆▆▇▆▆▆▆█▆▇▆
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.59394
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.58788
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▅▆▅▅▅▆▅▇▆▆▆▆▇▆█▆▆█▇▆▇▇▇█▇▇█▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64848
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▂▃▃▄▅▆▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57879
global_step,2400.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▄▄▄▄▄▄▅▆▆▆▆▇▇█▇▇▇▇████▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.61212
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▇█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁██████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁█████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2400 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁███████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5303
global_step,2400.0


## testing gnntype

In [None]:
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60

warmup_ratio = 0.15

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv', 'SAGEConv', 'GraphConv', 'ResGatedGraphConv', 'GATConv', 'GATv2Conv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [False]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (gnntype)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/14 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▅▆▆▆▅▇▆▇▇▆▇▆▇▆█▇▇█▇▇▇▆█▇█▇██▇█████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76061
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▃▅▅▅▆▆▆▆▆▇▇▇█▇▇█▇▇▇▇▇████▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▄▅▅▇▅▇▇▆▇▇█▇███▇▇█▇█▇▇▇▇▇▇▇███▇▇▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72727
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▄▂▅▅▆▄▆▆▇▇▇▇▇▇▇▇▇▇█▇▇█▇▇▇████▇▇██▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▂▄▃▆▆▅▆▆▇▇▇▇▇▇▇▇██▇▇▇▇▇█▇▇▇█▇▇▇█▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▂▄▃▆▆▆▇▇▇█▇▇▇▇▇█▇██▇█▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▃▄▃▃▅▆▆▆▇▆▇▇▆▇▇▇▇█▇▇▇██▇▇████▇▇▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69394
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▂▂▂▃▃▃▃▆▆▆▆▆▆▇▇▆▇▆▇▇█▇▇██▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▃▅▄▅▆▆▆▅▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇██▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▄▅▆▆▆▆▇▆▆▇▇▆▇▇▇▇█▇▇▇█▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▆▆▆▇▇▇▇▇▇▇▇▆▇█▇▇██▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▂▄▅▅▅▄▆▅▅▆▆▆▆▆▇▇▇▇█▇██▇▇███▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71515
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▄▅▄▅▅▅▅▇▇▆▆▇█▇█▇▇█▇▇███▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


## adding num of syllables

In [None]:
epochs = 60

warmup_ratio = 0.15

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3, 5e-4]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'GATConv', 'GATv2Conv']
ADD_SELF_LOOPS = [False, True]
ADD_SYLLABLES = [False, True]

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables = args
    
    seed = np.random.randint(100)
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (syllables)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/24 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▃▃▃▄▃█▇▇▆███▇▇█▇▇▇▇█▇▇▇█▇▇▇▇█▇█▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.65758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▃▃▃▄▅▇▇▆▇▇▇▇▇▇▇▇██▇▇████▇███▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▃▃▃▃▅▄▆▄▇▇▇▇█▅▇▇▇▇▇█▇▇██▇██▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71515
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▂▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▆▇▇▇▇███████▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▄▆▆█▆▇▇▇▇▇▇█▇█▇██▇█▇██▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▇▆▇█▇█▇███▇███▇▇▇█▇▇▇▇▇▇█▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▂▄▃▆▆▇▇▇▇█▇▇▇▇▇████▇█▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▂▅▅▅▅▆▆▇▆▇▇▇▇▇▇█▇█▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76667
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▆▅▇█▆▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▄▅▆▆▇▆▇▇▇████████▇▇██▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70606
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▃▄▅▄▆▆▆▇▆▆█▇▇▇▇███▇██▇█▇██████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇█▇▇▇▇██▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▆▆▇▇▇▇█▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67879
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▄▄▅▅▅▅▆▆▆▆▆▆▇▇██▇▇▇█▇▇███▇██
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67879
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333768049875895, max=1.0)…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▃▃▃▃▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇██▇████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69091
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▂▃▄▄▄▅▅▅▆▅▆▇▇▆▆▆▇▇▇▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▃▅▆▆▇▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▄▅▅▆▅▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.69697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▂▂▄▅▅▄▆▆▆▇▇▇▇█▇██▇██▇██▇█▇▇█▇██▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▃▅▅▄▆▅▆▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇█▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70909
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▂▁▅▅▅▅▇▆██▇▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▃▃▃▃▆▅▇▇▇▆▆▇▇▇▇▇▇▇█▇██▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.68788
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▄▄▅▄▅▅▄▅▇▇▆▆▇█▇█▇▇█▇▇███▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.70303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▃▃▄▃▄▅▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇█▇███████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7
global_step,2460.0


## 2 authors for the best

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [3]
LR = [1e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/20 [00:00<?, ?it/s]

data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▆▆▆▇▆▇▇▇▇▇█▇█▇██▇▇███████████████
best_accuracy,▁▁▂▂▃▃▅▆▆▆▇▇▇▇▇▇▇███████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
best_accuracy,0.76667
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▂▅▂▅▆▅▆▆▆▅▇▇▅▆█▇▇▆▇▆▇▇▇▇█▆█▇█▇▇▇▇█▇
best_accuracy,▁▁▁▁▁▂▅▅▅▆▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76061
best_accuracy,0.78485
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▁▃▄▅▅▅▆▆▆▆▆▆▇▆▆█▇███▇██▇▇█▇▇█▇████
best_accuracy,▁▁▁▁▁▄▄▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▂▅▄▅▅▆▆▆▅▇▇▇▆▇▇▇▆▇▇████▇▇████▇█████
best_accuracy,▁▂▂▂▂▂▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75152
best_accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▂▁▅▄▅▄▆▇▇▅▆▇█▆▆▇▇▆█▇▇▇▇▇▇▇██▇████
best_accuracy,▁▁▁▁▁▁▁▂▂▅▅▅▅▆▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74545
best_accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▄▅▅▇▇▅▇▆▇▇▇▇▇▇████▇▇███████████████
best_accuracy,▁▁▁▁▁▄▅▅▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▃▃▅▄▇▇▇▆▇▇▇▇▇▇▇▇▇███▇█▇█████████████
best_accuracy,▁▁▃▃▃▃▅▅▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74242
best_accuracy,0.74545
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▃▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇████▇▇▇████████▇█████
best_accuracy,▁▂▃▃▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.75152
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▃▃▅▆▆▆▇▇▇▇▇▆▇▇▇██▇██▇▇███████▇████
best_accuracy,▁▃▃▃▃▃▃▃▅▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75455
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▃▃▅▄▆▅▆▆▆▆▇▇▇▇██▇▇▇██▇▇███▇████▇████
best_accuracy,▁▃▃▃▃▃▅▅▆▆▆▆▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71818
best_accuracy,0.7303
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▁▃▂▄▄▅▅▇▆▇▇▇▇▇▆▇▇▇▇█▆▇▇██▇▇████▇███
best_accuracy,▁▁▁▁▂▂▃▃▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75455
best_accuracy,0.76364
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▃▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇█▇█████████████████
best_accuracy,▁▁▁▁▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73939
best_accuracy,0.74848
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▂▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██▇████████████
best_accuracy,▁▃▃▃▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.78182
best_accuracy,0.79697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▁▂▄▄▅▅▆▆▆▇▇▇▇█▇▇██▇████████▇███████
best_accuracy,▁▁▁▁▃▃▃▄▅▅▅▆▆▆▆▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74848
best_accuracy,0.75455
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▆▂▆▃▅▅▆▅▆▆▆▇▆▇▇▇▇▇▇███▇██▇████▇█████
best_accuracy,▁▁▂▂▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.76667
best_accuracy,0.7697
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▄▅▆▆▆▆▆▇▇▇▇▇▇▇▆█▇▇█▇▇██▇████▇▇██████
best_accuracy,▁▁▃▃▃▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72727
best_accuracy,0.74242
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▆▅▆▅▇▇▇▆▇▇▅▆▆▇▇█▇▇▇█▇▇██▇███▇▇▇███▇█
best_accuracy,▁▁▁▁▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.71212
best_accuracy,0.72424
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▁▂▅▄▆▅▆▆▇▇▇▇▇▇▇▇▇█████▇▇█▇██▇███████
best_accuracy,▁▁▁▁▁▁▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇███████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74848
best_accuracy,0.76061
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033339587847391765, max=1.0…

  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▃▃▃▅▄▄▆▆▆▆▇▇▇▆▇▆▇▇▇▇▇▇▇██▇█▇██▇████▇
best_accuracy,▁▁▁▁▃▃▃▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73636
best_accuracy,0.75758
global_step,2460.0


data 908 too long length 134
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2460 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▂▂▂▃▅▅▆▆▇▇▇▇█▇▇▇█▇▇█▇▇▇▇██▇████▇██▇███
best_accuracy,▁▂▂▂▂▃▅▅▆▆▆▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.72121
best_accuracy,0.7303
global_step,2460.0


# imdb

In [None]:
cols_to_eval = ['homo_edges', 'hetoro_edges', 'pos_seqs', 'upos_seqs', 'num_syllables']

file = '../../data/imdb/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
for col in cols_to_eval:
    df[col] = df[col].apply(ast.literal_eval)

file = '../../data/imdb/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
for col in cols_to_eval:
    df_val[col] = df_val[col].apply(ast.literal_eval)

In [None]:
epochs = 60
warmup_ratio = 0.15
monitering_metric = 'accuracy'

LIMIT = [None]
NUM_LAYERS = [4]
LR = [1e-3, 5e-3]
READOUT = ['pool']
GNNTYPE = ['GCNConv', 'ChebConv']
ADD_SELF_LOOPS = [True]
ADD_SYLLABLES = [True, False]
REPEAT = list(range(5))

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
num_runs = len(list(ARGS))
run_pbar = trange(num_runs, leave=False)

ARGS = itertools.product(LIMIT, NUM_LAYERS, LR, READOUT, GNNTYPE, ADD_SELF_LOOPS, ADD_SYLLABLES, REPEAT)
for i_run, args in enumerate(ARGS):
    limit, num_layers, lr, readout, gnntype, add_self_loops, add_syllables, repeat = args
    
    seed = int(datetime.now().timestamp())
    set_seed(seed)
    
    valid_loader = get_loader(df_val, add_syllables=add_syllables)
    num_valid_steps = len(valid_loader)
    train_loader = get_loader(df, limit = limit, add_syllables=add_syllables)
    num_training_steps = len(train_loader)
    
    model = myHomoGNN(num_layers=num_layers,
                      num_classes=2, 
                      add_self_loops=add_self_loops,
                      gnntype=gnntype,
                      add_syllables=add_syllables
                     )
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['seed'] = seed
    wconfig['limit'] = limit
    wconfig['num_layers'] = num_layers
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['GNNtype'] = gnntype
    wconfig['add_self_loops'] = add_self_loops
    wconfig['add_syllables'] = add_syllables
    
    run = wandb.init(project="homo POS GNN imdb (best)", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}',
                     reinit=True)
    
    best_metric = 0.0
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            if add_syllables:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, data.num_syllables, readout=readout)
            else:
                output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            metric.add_batch(predictions=output.logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
        if (best_metric < evaluation[monitering_metric]):
            best_metric = evaluation[monitering_metric]
        wandb.log({f'best_{monitering_metric}': best_metric})
        
    run.finish()
    run_pbar.update(1)

  0%|          | 0/40 [00:00<?, ?it/s]

data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▃▅▆▆▃▇▇▇▅▇█▇█▇▇▇██▇█▇█▇▇█▇██▇████████
best_accuracy,▁▁▃▃▅▆▆▆▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87602
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▅▄▅▆▄▇▇███▇████▇█▇▇▇█████▇██████████
best_accuracy,▁▁▂▂▅▅▅▆▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87243
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▆▆▆▆▇▇█▆▇▇█▆▇▆▇█▇▇████▇█████████████
best_accuracy,▁▁▂▄▆▆▆▆▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87113
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▃▅▆▆▇▇▇▇▇▇▇▇████▇▇▇█▇███▇███████████
best_accuracy,▁▁▂▃▃▄▆▆▇▇▇▇▇▇▇▇████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8721
best_accuracy,0.87439
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▅▅▆▇▇▇▇▇█▇▇▇█▇▇▇█▇█▇█▇█▇███▇████████
best_accuracy,▁▁▁▂▅▅▆▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87504
best_accuracy,0.87928
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▃▄▆▆▇▆▇▆▇▇▇▇▇▇███▇▇█▇██▇███████▇█████
best_accuracy,▁▁▁▃▅▆▆▇▇▇▇▇▇▇▇▇████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86591
best_accuracy,0.86819
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▅▅▆▆▇▆▆▇▇▇▇▇▇▇████▇▇▇█▇█████████████
best_accuracy,▁▁▂▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86688
best_accuracy,0.86884
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▆▆▇▆▆▆▇▇▇█▇▇▇▇▇▇███████▇██▇██▇▇▇████
best_accuracy,▁▁▂▄▆▆▇▇▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86721
best_accuracy,0.87308
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇█▇▆▇████████▇███▇█████
best_accuracy,▁▁▃▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87276
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▆▇▇▇▇▇█▇████▇█▇█▇██████████████████
best_accuracy,▁▁▃▄▅▆▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87113
best_accuracy,0.87406
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▆▅▆▇▇█▇▆█▇▇███████████▇▇▇▇███▇▇█▇▇▇
best_accuracy,▁▁▃▄▅▆▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87015
best_accuracy,0.87896
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▆▆▇▄▇▇▇▇█▇███▇█▆▇██▇▇▇█▇▆▇▆▇▇▇▆▇▇▇▇▇
best_accuracy,▁▁▃▄▆▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.88124
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▅▅▆▄▇▇███▆██▆███▇██▇▇▇▇▇▇▆▇▆█▇▇▇▇▇▇▇
best_accuracy,▁▁▃▄▅▅▆▆▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8633
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▃▅▆▅▆▇▇▆█████▇███▇█▇▇█▇█▇██▇▇▇▇▇▇█▇▇▇
best_accuracy,▁▁▄▄▅▆▆▆▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86591
best_accuracy,0.87504
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▅▆▅▅▆▇▇▇█████▇██▇▆████████▇▇█▇▇▇▇▇▇▇
best_accuracy,▁▁▂▃▅▆▆▆▆▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86525
best_accuracy,0.87602
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▅▆▇▇▇▇█▇█▇▆▇█████▇█▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▄▅▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85481
best_accuracy,0.8708
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▃▄▄▆▅▆▅▇▇▄█▆█▇█▇▇▇█▇▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▃▄▄▆▆▆▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85579
best_accuracy,0.86688
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▄▆▇▇▇▇█▆████▇▇▆▇██▆█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▄▆▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.84666
best_accuracy,0.85808
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▄▅▆▇▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▁▄▅▆▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.846
best_accuracy,0.86069
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▄▃▆▇▇▇▇▇▇▆▇▇█▆█▇█▆▄█▆███▇█▆▇▇█▇▇██▇▇▇
best_accuracy,▁▁▂▄▄▆▇▇▇▇▇▇▇▇▇█████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8584
best_accuracy,0.86852
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▅▇▆▅▅▇▇▇█▇▇█▇▇███████▇██▇▇█▇█▇█▇▇▇▇▇▇
best_accuracy,▁▃▄▅▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86525
best_accuracy,0.87537
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▆▆▆▅▄▆▇███▇▇▇▇██▇█▇▇█▇▇██▇▇█▇██▇███▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇▇███████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86362
best_accuracy,0.87471
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▆▆▇▅▇▇▇▃▇▇▇█▇▇▇▆███▇█▇█▇███▇█▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▆▆▇▇▇▇▇▇▇▇▇██████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87145
best_accuracy,0.87993
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▃▇▃▅▇▇▇▇█▇▇██▇█▇██▇▇▇▇▇██▇█▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▅▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87406
best_accuracy,0.88581
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▄▃▆▇▇▇▇▇▆███▇██▇█▇█▆██▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▄▄▅▆▇▇▇▇▇▇▇███████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86395
best_accuracy,0.88026
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▅▇▆▇▆▅▇███▆█▆██▇██▇▇▇▇▆▇█▇▇▇▇▇█▇█▇▇▇▇
best_accuracy,▁▃▆▆▇▇▇▇▇▇██████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.86884
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▆▇▇▅▇▆▇▇▇▇██▆███████▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86069
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338705698649086, max=1.0…

  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▆▆▇▇▇▇▇▇█▇███▇█▇▇▇███▇█▇▇▇▇▇██▇▆▇▇▇▇▇
best_accuracy,▁▃▆▆▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86297
best_accuracy,0.87374
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▇▇▆▇▇▆▇▆█▇█▇▇██▇▆███████▇██▇▇▇██▇██▇▇
best_accuracy,▁▂▅▇▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86558
best_accuracy,0.87308
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▅▆▇▇▆▇▇▅▆█▇▇▇▇▇█▇██▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▂▅▆▇▇▇▇▇▇▇█████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86493
best_accuracy,0.87569
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▆▆▆▅▇▇▇▇▇▇█▇███▇██▇█▇█▇▆▇▇▇▇▇▇██▇█▇▇█▇
best_accuracy,▁▃▆▆▆▆▆▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86166
best_accuracy,0.87406
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▆▅▇▇█▇▇▇▇▇█▇▇▇█▆▇▇█▇█▇▇▇███▇██▇█▇██▇█
best_accuracy,▁▃▄▆▆▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.87667
best_accuracy,0.877
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▅▆█▇▇▆▇█████▇▅▇█▇▇▇▇▇▇▇█▇▇██▇▇▇▇▇█▇▇▇█
best_accuracy,▁▄▅▆████████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86884
best_accuracy,0.87471
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▅▅▄█▇▇██▇▇▇█▇▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇
best_accuracy,▁▃▅▅▅▅██████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86754
best_accuracy,0.8783
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▇▇▇▆█▇▇███▇▇███▇█▇▇▇▇██▇████▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▅▇▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86134
best_accuracy,0.87178
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▆▅█▇██▇▇▅▇▆▇▇▆▇▆▇▆▆▇▆▇▆▆▆▆▇▆▆▆▆▆▆▆▆▇▆
best_accuracy,▁▄▆▆▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85188
best_accuracy,0.87569
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▇█████████▇▇▇▇▇██▇▇▇█▆▇▇▇█▆▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▄▆▇▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85546
best_accuracy,0.86819
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▆▇█▇█▇█▇▇█▆██▇▇▇█▇▇▇▇▇█▇▆▇▇▇██▇▇▇▇▇▇▇
best_accuracy,▁▄▆▆▇███████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.8571
best_accuracy,0.86917
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▅▄▆▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
best_accuracy,▁▃▄▅▅▆▇▇▇▇▇▇████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.86395
best_accuracy,0.87798
global_step,17280.0


data 2787 too long length 144
data 3027 too long length 128
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/17280 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▅▅▇▆▄██▇█▇██▇▇▇▇▇▇▆▇▅▇▇▇▇▇▇▆▇▇▆▇▇▆▇▇▇▇
best_accuracy,▁▄▅▅▇▇▇█████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.85905
best_accuracy,0.87504
global_step,17280.0


In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

In [None]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
count_parameters(model)

8386

In [None]:
count_parameters(bert)

56800

In [None]:
count_trainable_parameters(bert)

12704

In [None]:
count_trainable_parameters(bert)

50816

In [None]:
bert = freeze_model(bert, 0)

In [None]:
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/retrained_pos_mlm_1/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
