In [1]:
import os
import numpy as np

import torch
from torch import nn
import wandb

from datasets import load_from_disk
from transformers import BertModel, AutoModelForSequenceClassification
from transformers import Trainer, EvalPrediction, TrainingArguments, EarlyStoppingCallback

from transformers.integrations import WandbCallback
from utils import *

In [2]:
path_to_pipeline = '/home/ubuntu/partisan_bias_detection/best_models/final_pipelines/final_mini_pipeline'

In [3]:
WANDB_NAME = 'continue_final_mini_pipeline_2' 
WANDB_PROJECT = 'continue_train_mini_pipeline' 
WANDB_NOTEBOOK_NAME = 'continue_train_mini_pipeline' 

In [15]:
train_size = 840000 
val_size = 120000
batch_size = 16
num_steps_per_epoch = int(train_size / batch_size)
num_epochs = 1

total_steps = num_steps_per_epoch * num_epochs

logging_steps = round(total_steps / 100)
eval_steps = logging_steps * 2 

print(f'total_steps: {total_steps}')
print(f'logging_steps: {logging_steps}')
print(f'eval_steps: {eval_steps}')

total_steps: 52500
logging_steps: 525
eval_steps: 1050


In [16]:
CWD = os.getcwd()
DATASET_DIR = os.path.join(CWD, 'data')
dataset = load_data_from_dir(DATASET_DIR)



In [17]:
torch.cuda.empty_cache()

In [18]:
# wandb logging
os.environ['WANDB_API_KEY'] = '409d576b1e20724351b01a9d45b006f36972d20f' 
os.environ['WANDB_PROJECT'] = WANDB_PROJECT
os.environ['WANDB_LOG_MODEL'] = 'end'
os.environ['WANDB_WATCH'] = 'false'
os.environ['WANDB_NOTEBOOK_NAME'] = WANDB_NOTEBOOK_NAME

In [21]:
wandb.init()
    
# initialize model
model = pipeline_init(path_to_pipeline)
save_path = os.path.join(WANDB_PROJECT, WANDB_NAME)
run_name = WANDB_NAME
        
# set hyperparams
lr = 1e-7
num_train_epochs = 1

args = TrainingArguments(
        learning_rate=lr,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=256,
        num_train_epochs=num_train_epochs, 
        gradient_accumulation_steps= 4,
        adam_beta1=0.9,
        adam_beta2=0.999,
        adam_epsilon=1e-08,
        lr_scheduler_type='linear',
        logging_strategy='steps',
        logging_steps=logging_steps,
        evaluation_strategy = 'steps',
        eval_steps=eval_steps, 
        save_strategy='steps',
        save_steps=eval_steps*5,
        load_best_model_at_end=True,
        report_to='wandb',
        logging_dir=os.path.join('logs', save_path),
        output_dir=os.path.join('models', save_path),
        save_total_limit=3, 
        run_name=run_name
    )
    
# freeze bert embeds
for param in model.hi_transformer.embeddings.embeds.parameters():
    param.requires_grad = False
    
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset['train'].select(range(400000)),
    eval_dataset=dataset['test'].select(range(10000)),
    compute_metrics=compute_metrics,
)
    
trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.0))
trainer.train()
    
# save best model
trainer.save_model(os.path.join('best_models', save_path))
trainer.save_state()

loading configuration file /home/ubuntu/partisan_bias_detection/best_models/final_pipelines/final_mini_pipeline/config.json
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
loading configuration file /home/ubuntu/partisan_bias_detection/best_models/final_pipelines/final_mini_pipeline/config.json
Model config HATConfig {
  "_name_or_path": "/home/ubuntu/partisan_bias_detection/best_models/final_pipelines/final_mini_pipeline",
  "architectures": [
    "HATForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "auto_map": {
    "AutoConfig": "configuration_hat.HATConfig",
    "AutoModel": "modelling_hat.HATModel",
    "AutoModelForMaskedLM": "modelling_hat.HATForMaskedLM",
    "AutoModelForMultipleChoice": "modelling_hat.HATForMultipleChoice",
    "AutoModelForQuestionAnswering": "modelling_hat.HATForQuestionAnswering",
    "AutoModelForSequenceClassification"

Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1050,0.1399,0.160149,0.9477,0.937332,0.935159,0.93623
2100,0.137,0.160058,0.9477,0.937397,0.934943,0.936152
3150,0.1343,0.160097,0.9477,0.937397,0.934943,0.936152
4200,0.1321,0.160104,0.9476,0.93714,0.934735,0.935922
5250,0.1342,0.160101,0.9476,0.93714,0.934735,0.935922


***** Running Evaluation *****
  Num examples = 10000
  Batch size = 256
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 256
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 256
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 256
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 256
Saving model checkpoint to models/continue_train_mini_pipeline/continue_final_mini_pipeline/checkpoint-5250
Configuration saved in models/continue_train_mini_pipeline/continue_final_mini_pipeline/checkpoint-5250/config.json
Model weights saved in models/continue_train_mini_pipeline/continue_final_mini_pipeline/checkpoint-5250/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from models/continue_train_mini_pipeline/continue_final_mini_pipeline/checkpoint-5250 (score: 0.16010068356990814).
Saving model checkpoint to /tmp/tmpsb2w4v75
Configuration saved in /

In [22]:
with open(os.path.join('best_models', save_path, 'embeds'), 'wb') as f:
    torch.save(model.hi_transformer.embeddings, f)

print(f'saved to best_models/{save_path}')

wandb.finish()

saved to best_models/continue_train_mini_pipeline/continue_final_mini_pipeline


0,1
eval/accuracy,███▁▁
eval/f1,█▆▆▁▁
eval/loss,█▁▄▅▄
eval/precision,▆██▁▁
eval/recall,█▄▄▁▁
eval/runtime,█▁▁▁▁
eval/samples_per_second,▁████
eval/steps_per_second,▁████
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▅▆▇▇▇█
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▅▆▇▇▇█

0,1
eval/accuracy,0.9476
eval/f1,0.93592
eval/loss,0.1601
eval/precision,0.93714
eval/recall,0.93473
eval/runtime,11.0342
eval/samples_per_second,906.27
eval/steps_per_second,3.625
train/epoch,1.0
train/global_step,6250.0
