In [None]:
from torch_geometric.datasets import TUDataset, OGB_MAG
import torch_geometric.transforms as T
from nltk.parse.corenlp import CoreNLPParser,CoreNLPDependencyParser
from tqdm.auto import trange, tqdm
import pandas as pd
import ast
import itertools
from transformers import get_scheduler
import torch
import wandb
import evaluate
from itertools import cycle
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# preprocessing 

In [None]:
def create_dataset(df_ccat, num_authors_to_pick = None, picked_author_ids = None, num_sent_per_text = None, save_folder = None, train=True):
    unique_authors = list(df_ccat['author_id'].unique())
    if not picked_author_ids:
        picked_author_ids = sorted(np.random.choice(unique_authors, replace=False, size=num_authors_to_pick).tolist())
    authors = []
    texts = []
    for author in picked_author_ids:
        df_temp = df_ccat[df_ccat['author_id'] == author]
        for i_doc in range(len(df_temp)):
            doc = df_temp['text'].iloc[i_doc].split('\n')
            for i in range(len(doc)):
                doc[i] = doc[i].strip()
            doc.remove('')
            for i in range(len(doc)-num_sent_per_text):
                authors.append(author)
                texts.append(' '.join(doc[i:i+num_sent_per_text]))
    df = pd.DataFrame({'author':authors, 'text':texts})
    if save_folder:
        str_author = ','.join(map(str, picked_author_ids))
        file_name = f"author_{str_author}_sent_{num_sent_per_text}_{'train' if train else 'val'}.csv"
        df.to_csv(f"{save_folder}/{file_name}", index=False)
        return df, file_name
    return df

In [None]:
# df_ccat = pd.read_csv('../../data/CCAT50/processed/CCAT50_train.csv')
# picked_author_ids = [0,1]
# num_sent_per_text = 2
# save_folder = '../../data/CCAT50/processed/'
# df, file_name = create_dataset(df_ccat, picked_author_ids = picked_author_ids, num_sent_per_text = num_sent_per_text, save_folder = save_folder)

In [None]:
# df_ccat = pd.read_csv('../../data/CCAT50/processed/CCAT50_AA_val.csv')
# picked_author_ids = [0,1]
# num_sent_per_text = 2
# save_folder = '../../data/CCAT50/processed/'
# df, file_name = create_dataset(df_ccat, picked_author_ids = picked_author_ids, num_sent_per_text = num_sent_per_text, save_folder = save_folder, train=False)

In [None]:
depparser = CoreNLPDependencyParser(url='http://localhost:9000')

In [None]:
def get_dep_edges(texts):
    homo_edges = []
    hetoro_edges = []
    pos_seqs = []
    for text in tqdm(texts):
        parsed = depparser.raw_parse(text)
        conll_dep = next(parsed).to_conll(4)
        lines = conll_dep.split('\n')
        homo_edge = []
        hetoro_edge = []
        pos_seq = []
        for i,line in enumerate(lines[:-1]):
            l = line.split('\t')
            homo_edge.append([i+1, int(l[2])])
            hetoro_edge.append([i+1, int(l[2]), l[3]])
            pos_seq.append(l[1])
        homo_edges.append(homo_edge)
        hetoro_edges.append(hetoro_edge)
        pos_seqs.append(pos_seq)
    return homo_edges, hetoro_edges, pos_seqs

In [None]:
# # processing train set
# file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
# df = pd.read_csv(file)
# homo_edges, hetoro_edges, pos_seqs = get_dep_edges(df['text'])
# df['homo_edges'] = homo_edges
# df['hetoro_edges'] = hetoro_edges
# df['pos_seqs'] = pos_seqs
# df.to_csv(file, index=False)

In [None]:
# # processing val set
# file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
# df_val = pd.read_csv(file)
# homo_edges, hetoro_edges, pos_seqs = get_dep_edges(df_val['text'])
# df_val['homo_edges'] = homo_edges
# df_val['hetoro_edges'] = hetoro_edges
# df_val['pos_seqs'] = pos_seqs
# df_val.to_csv(file, index=False)

## load processed files

In [None]:
file = '../../data/CCAT50/processed/author_0,1_sent_2_train.csv'
df = pd.read_csv(file)
df['homo_edges'] = df['homo_edges'].apply(ast.literal_eval)
df['hetoro_edges'] = df['hetoro_edges'].apply(ast.literal_eval)
df['pos_seqs'] = df['pos_seqs'].apply(ast.literal_eval)

In [None]:
file = '../../data/CCAT50/processed/author_0,1_sent_2_val.csv'
df_val = pd.read_csv(file)
df_val['homo_edges'] = df_val['homo_edges'].apply(ast.literal_eval)
df_val['hetoro_edges'] = df_val['hetoro_edges'].apply(ast.literal_eval)
df_val['pos_seqs'] = df_val['pos_seqs'].apply(ast.literal_eval)

In [None]:
# relations found from the data
unique_relations = set()
for hetoro_edges in df['hetoro_edges']:
    for edge in hetoro_edges:
        unique_relations.add(edge[2].split(':')[0])

In [None]:
def freeze_model(model, freeze_bert):
    '''
    if freeze_bert == True, freeze all layer. 
    if freeze_bert is a positive integer, freeze the bottom {freeze_bert} attention layers
    negative integer should also work
    '''
    if freeze_bert==True:
        for param in model.parameters():
            param.requires_grad = False
    elif isinstance(freeze_bert, (int, np.int32, np.int64, torch.int32, torch.int64)):
        for param in model.embeddings.parameters():
            param.requires_grad = False  
        for layer in model.encoder.layer[:freeze_bert]: 
            for param in layer.parameters():
                param.requires_grad = False  
    return model

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from transformers import AutoTokenizer
from transformers.models.bert.modeling_bert import BertModel
import torch

In [None]:
checkpoint = '/scratch/data_jz17d/result/pos_mlm_corenlp/pos_mlm_8/checkpoint-155000/'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, local_files_only=True)

In [None]:
bert = BertModel.from_pretrained(checkpoint, local_files_only=True, add_pooling_layer = False)
# bert = freeze_model(bert, True)
bert = bert.eval()

Some weights of the model checkpoint at /scratch/data_jz17d/result/pos_mlm_corenlp/pos_mlm_8/checkpoint-155000/ were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# homo edges

In [None]:
# loaders
max_length = 128
data_list = []
for i in range(len(df)):
    curr = df.iloc[i]
    data = Data()
    data.edge_index = torch.tensor(curr['homo_edges']).T
    if data.edge_index.shape[1] > max_length-1:
        print(f"data {i} too long length {data.edge_index.shape[1]}")
        continue
    tokens = tokenizer(' '.join(curr['pos_seqs']), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
    data.y = torch.tensor([curr['author']])
    data_list.append(data)
    
train_loader = DataLoader(data_list, batch_size=32, shuffle=True)


data 511 too long length 134


In [None]:
data_list = []
for i in range(len(df_val)):
    curr = df_val.iloc[i]
    data = Data()
    data.edge_index = torch.tensor(curr['homo_edges']).T
    if data.edge_index.shape[1] > max_length-1:
        print(f"data {i} too long length {data.edge_index.shape[1]}")
        continue
    tokens = tokenizer(' '.join(curr['pos_seqs']), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
    data.y = torch.tensor([curr['author']])
    data_list.append(data)
    
valid_loader = DataLoader(data_list, batch_size=32, shuffle=True)


In [None]:
num_training_steps = len(train_loader)
num_valid_steps = len(valid_loader)

In [None]:
from dataclasses import dataclass
@dataclass
class myGCNoutput:
    loss: None
    logit: None
    emb: None

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

pos_emb_dim = 32
GCNtype2layer = {'GCNConv':GCNConv, }


class myGCN(torch.nn.Module):
    def __init__(self, num_gcn, num_class, GCNtype='GCNConv'):
        super().__init__()
        self.num_gcn = num_gcn
        self.num_class = num_class
        self.GCNlayer = GCNtype2layer[GCNtype]
        
        self.gcns = nn.ModuleList()
        for i in range(num_gcn):
            self.gcns.append(self.GCNlayer(pos_emb_dim, pos_emb_dim))
        
        self.classifier = nn.Linear(pos_emb_dim, num_class)
        self.lossfn = nn.CrossEntropyLoss()
        
    def forward(self, x, edge_index, batch, y, ptr, readout='pool'):
        for i in range(self.num_gcn):
            x = self.gcns[i](x, edge_index)
            x = F.relu(x)
        
        if readout == 'pool':
            x = global_mean_pool(x, batch) 
        elif readout == 'cls':
            x = x[ptr[:-1],:]
        
        x = F.dropout(x, training=self.training)
        logit = self.classifier(x)
        loss = self.lossfn(logit, y)
        return myGCNoutput(loss=loss, logit=logit, emb=x)

In [None]:
epochs = 30
warmup_ratio = 0.15

NUM_GCN = [1,2,3,4]
LR = [1e-3, 1e-4, 1e-5]
READOUT = ['pool', 'cls']

num_runs = len(NUM_GCN)*len(LR)*len(READOUT)
run_pbar = trange(num_runs, leave=False)
for i_run, para in enumerate(itertools.product(NUM_GCN, LR, READOUT)):
    num_gcn, lr, readout = para
    
    model = myGCN(num_gcn, 2)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['num_gcn'] = num_gcn
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    
    run = wandb.init(project="POS GNN", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}_num_gcn_{num_gcn}_readout_{readout}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/24 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[34m[1mwandb[0m: Currently logged in as: [33mcpuyyp[0m ([33mfsu-dsc-cil[0m). Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▅▆▆█▇██████████████████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.72222
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▂▁▂▃▄▄▅▆▇▇▇██▇██▆██▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.66667
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338459332784016, max=1.0…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▃▆██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.58333
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▂▃▃▄▃▃▄▃▄▅▅▆▆▆▆▇▇▇▇▇██████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.58951
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57407
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033337910970052086, max=1.0…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.55556
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▅▆▅██▇██▆█▇█████████▇▇█▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.70062
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▃▅▅▇▅▅▆▆█▆▆▇▇▇▆█▆▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.64198
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▇▇███████████████████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57407
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▃▅▆▇▇▇▇▇▇▇▇▇▇▇▇██▇█████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.63272
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333993752797445, max=1.0)…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▁▃▂▂▄▄▃▂▃▃▃▄▃▄▄▅▆▇██▇▇▇███
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.49691
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57099
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▆▇▇▇▇▇█▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.70679
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033337910970052086, max=1.0…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▄▄▇▇▄▆▇▆▆▆▅█▇█▆▆▆▆▆█▇▆▆█▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.6821
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▅▅▅█▅███
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57716
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅███▅▅▅▅
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57407
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▄▅▇▇███████████████████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57099
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▂▃▅▆▇▆▆▆▅▅▆▆▆▇▆█████▇▆▆
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.44444
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▄▅▇▇▇▇████████████▇██████▇█▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.70679
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▅▆▅▇▆▆▅▄▇▇█▇▃▇▇▇▇█▇▆▇▇▇▇▇
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.66358
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333892822265625, max=1.0)…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57099
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▅▆▇▆▇▇▇▇▇████████████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.66975
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.0333377202351888, max=1.0))…

  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57099
global_step,1230.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/1230 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▂▂▄▅▆▅▆▆▆▇▆▆▆▇▇▇▇▇██████████
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
accuracy,0.57716
global_step,1230.0


# hetero edges

In [None]:
# English specific denpendency relations: https://universaldependencies.org/en/dep/
s = '''nsubj 	csubj
↳nsubj:pass 	↳csubj:pass
↳nsubj:outer 	↳csubj:outer
obj 	ccomp 	xcomp
iobj
obl 	advcl 	advmod
↳obl:npmod 	↳advcl:relcl
↳obl:tmod
vocative 	aux 	mark
discourse 	↳aux:pass
expl 	cop
nummod 	acl 	amod
  	↳acl:relcl
appos 	  	det
  	  	↳det:predet
nmod 	  	 
↳nmod:npmod
↳nmod:tmod
↳nmod:poss
compound 	flat
↳compound:prt 	↳flat:foreign
fixed 	goeswith
conj 	cc
  	↳cc:preconj
list 	parataxis 	orphan
dislocated 		reparandum
root 	punct 	dep'''
all_relations = []
s = s.split('\n')
for line in s:
    if '↳' in line:
        continue
    line = line.split('\t')
    for r in line:
        if r.strip() == '':
            continue
        all_relations.append(r.split(':')[0].strip())
if 'root' in all_relations:
    all_relations.remove('root')
    all_relations.append('ROOT')
    all_relations.append('case')      # manually add relation not in list
    all_relations.append('discourse')    # manually add relation not in list
all_relations = sorted(all_relations)

In [None]:
for r in unique_relations:
    if r not in all_relations:
        print(r)

In [None]:
relation2id = {all_relations[i]:i for i in range(len(all_relations))}

In [None]:
# loaders
max_length = 128
data_list = []
for i in range(len(df)):
    curr = df.iloc[i]
    data = Data()
    edge_type = np.array(curr['hetoro_edges'])[:,2].tolist()
    data.edge_type_ids = torch.tensor([relation2id[t.split(':')[0]] for t in edge_type])
    data.edge_index = torch.tensor(curr['homo_edges']).T
    if data.edge_index.shape[1] > max_length-1:
        print(f"data {i} too long length {data.edge_index.shape[1]}")
        continue
    tokens = tokenizer(' '.join(curr['pos_seqs']), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
    data.y = torch.tensor([curr['author']])
    data_list.append(data)
    
train_loader = DataLoader(data_list, batch_size=32, shuffle=True)


data 511 too long length 134


In [None]:
data_list = []
for i in range(len(df_val)):
    curr = df_val.iloc[i]
    data = Data()
    edge_type = np.array(curr['hetoro_edges'])[:,2].tolist()
    data.edge_type_ids = torch.tensor([relation2id[t.split(':')[0]] for t in edge_type])
    data.edge_index = torch.tensor(curr['homo_edges']).T
    if data.edge_index.shape[1] > max_length-1:
        print(f"data {i} too long length {data.edge_index.shape[1]}")
        continue
    tokens = tokenizer(' '.join(curr['pos_seqs']), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    data.x = bert(**tokens).last_hidden_state.squeeze(0).detach()
    data.y = torch.tensor([curr['author']])
    data_list.append(data)
    
valid_loader = DataLoader(data_list, batch_size=32, shuffle=True)


In [None]:
num_training_steps = len(train_loader)
num_valid_steps = len(valid_loader)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool

pos_emb_dim = 32
GNNtype2layer = {'GATConv':GATConv, }

class myHeteroGNN(torch.nn.Module):
    def __init__(self, num_gnn, num_class, num_edge_type, num_heads = 1, GNNtype='GATConv'):
        super().__init__()
        self.num_gnn = num_gnn
        self.num_class = num_class
        self.GNNlayer = GNNtype2layer[GNNtype]
        self.edge_emb_layer = nn.Embedding(num_edge_type, pos_emb_dim)
        
        self.gnns = nn.ModuleList()
        for i in range(num_gnn):
            
            self.gnns.append(self.GNNlayer(pos_emb_dim, pos_emb_dim, heads = num_heads, edge_dim=pos_emb_dim))
        
        self.classifier = nn.Linear(pos_emb_dim, num_class)
        self.lossfn = nn.CrossEntropyLoss()
        
    def forward(self, x, edge_index, edge_type_ids, batch, y, ptr, readout='pool'):
        edge_attr = self.edge_emb_layer(edge_type_ids)
        for i in range(self.num_gnn):
            x = self.gnns[i](x, edge_index, edge_attr=edge_attr)
            x = F.relu(x)
        
        if readout == 'pool':
            x = global_mean_pool(x, batch) 
        elif readout == 'cls':
            x = x[ptr[:-1],:]
        
        x = F.dropout(x, training=self.training)
        logit = self.classifier(x)
        loss = self.lossfn(logit, y)
        return myGCNoutput(loss=loss, logit=logit, emb=x)

In [None]:
epochs = 50
warmup_ratio = 0.15

NUM_GNN = [1,2,3,4]
LR = [1e-3, 1e-4, 1e-5]
READOUT = ['pool', 'cls']
GNNTYPE = ['GATConv']

num_runs = len(NUM_GNN)*len(LR)*len(READOUT)
run_pbar = trange(num_runs, leave=False)
for i_run, para in enumerate(itertools.product(NUM_GNN, LR, READOUT)):
    num_gnn, lr, readout = para
    
    model = myHeteroGNN(num_gnn, 2, num_edge_type=len(all_relations))
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    scheduler = get_scheduler("linear",
                            optimizer=optimizer,
                            num_warmup_steps=int(warmup_ratio*epochs*num_training_steps),
                            num_training_steps=epochs*num_training_steps)
    
    wconfig = {}
    wconfig['num_gnn'] = num_gnn
    wconfig['lr'] = lr
    wconfig['readout'] = readout
    wconfig['edgetype'] = 'hetero'
    wconfig['GNNtype'] = 'GATConv'
    
    run = wandb.init(project="POS GNN", 
                     entity="fsu-dsc-cil", 
                     dir='/scratch/data_jz17d/wandb_tmp/', 
                     config=wconfig,
                     name=f'run_{i_run}_hetero_num_gnn_{num_gnn}_readout_{readout}',
                     reinit=True)
    
    pbar = trange(epochs*num_training_steps, leave=False)
    for i_epoch in range(epochs):
        model.train()
        for data in train_loader:
            data.to(device)
            optimizer.zero_grad()
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            output.loss.backward()
            optimizer.step()
            scheduler.step()
            pbar.update(1)

        model.eval()
        metric = evaluate.load('accuracy')
        for data in valid_loader:
            data.to(device)
            output = model(data.x, data.edge_index, data.edge_type_ids, data.batch, data.y, data.ptr, readout=readout)
            logit = output.logit
            metric.add_batch(predictions=logit.argmax(axis=-1).cpu().detach().numpy(), references=data.y.cpu().numpy())
        evaluation = metric.compute()
        evaluation.update({'global_step':pbar.n})
        wandb.log(evaluation, step=pbar.n)
    
    run.finish()
    run_pbar.update(1)

  0%|          | 0/24 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▂▂▄▅▇▇▇█▇█▇██▇█▇█▇█▇█▇▇▇▇▇█▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73457
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▃▅▅▆▇▇▇▇▆█▇▇▇▆▇▇▆▆▇▇▇▇▆▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.63889
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333935737609863, max=1.0)…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▃▅▆████████████████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57407
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▂▄▃▅▅▆▆▆▇█████▇█▇████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.59877
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03334000905354818, max=1.0)…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▁▁▂▂▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.54321
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▂▂▂▂▃▄▄▄▅▆▅▅▆▆▆▆▆▇▇▇▇▇▇███████▇██████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.54321
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▃▁▅▅▅▅▆▆▇█▇▇█▇██▇▇█▇████████████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.73457
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▄▅▆▆▆▇██▇██▇██▇▇▇▇▇▇▇▇█▇▇██▇█▇▇▇██▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.66049
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▄▆▆▆▇▇▇█▇▇███████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64198
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033338252703348795, max=1.0…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▂▂▂▃▃▃▄▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.6358
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333810965220133, max=1.0)…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,██████████████████████████▅▅▅▅▅▅▅▅▅▅▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.42284
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333850701649984, max=1.0)…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▄▄▄▄▁▄▄█▄▄▄▄▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.5679
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▃▅▄▄▆▆▆▆▇▇▆▇▇▇▆▇▇▇▇▇▇▇▇▇██▇▇▇█▇█▇███
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.75309
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▆▆▆▆▆▇▇█▇▇▇▇██▇█▇█▇▇█▇▇█▇▇█▇▇▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.64198
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▂▄▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.60802
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333799044291178, max=1.0)…

  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▂▂▂▃▄▄▅▆▆▇▇▇▇▇▇█▇██████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.67284
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.42901
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▂▁▂▂▄▃▄▄▃▄▄▅▇▇██████████▇▇▇▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.56173
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▅▅▆▆▆▇▇▇▇▇▆▇▇█▇▇████▇▇█▇█▇▇█▇▇▇▇█▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.74691
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▅▆▆▇▇▆▇▇▇▇▇█▇▇▇▇█▇▇▇█▇███▇████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.66358
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▄▆▆▆▇▇▇▇█████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.7037
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▄▆▅▅▆▆▇▇▇███▇▇▇▇▆▇▇▇▇
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.62963
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▄▆▆█▇▇▇▇▇▇█████████████
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.57099
global_step,2050.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2050 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
accuracy,0.42901
global_step,2050.0
