# Train

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import torch
import apex
import os
import logging

from pytorch_transformers import BertTokenizer
from fast_bert.data_cls import BertDataBunch
from fast_bert.learner_cls import BertLearner
from fast_bert.metrics import accuracy_multilabel, accuracy_thresh, fbeta, roc_auc
from fast_bert.prediction import BertClassificationPredictor

Let's first:

1. Assign our paths
2. Check gpu status
3. Import our labels
4. Create a logger object
5. Decide the metrics we want to report

In [2]:
BASE = Path('data/phenotype_classification/')

DATA_PATH = BASE/'transformer/original/' # change this as necessary
LABEL_PATH = BASE
OUTPUT_DIR = BASE/'transformer/original/output' # change this as necessary
OUTPUT_DIR.mkdir(exist_ok=True)

In [3]:
# check if (multiple) GPUs are available

multi_gpu=False

if torch.cuda.is_available():
    
    device_cuda = torch.device("cuda")
    
    if torch.cuda.device_count() > 1:
        multi_gpu = True
else:
    device_cuda = torch.device("cpu")
    
print (multi_gpu)

True


In [4]:
categories = pd.read_csv(LABEL_PATH/'labels.csv', sep=',',header=None,names=["name"])
labels = list(categories['name'])
labels

['Advanced.Cancer',
 'Advanced.Heart.Disease',
 'Advanced.Lung.Disease',
 'Alcohol.Abuse',
 'Chronic.Neurological.Dystrophies',
 'Chronic.Pain.Fibromyalgia',
 'Dementia',
 'Depression',
 'Developmental.Delay.Retardation',
 'Non.Adherence',
 'Obesity',
 'Other.Substance.Abuse',
 'Schizophrenia.and.other.Psychiatric.Disorders',
 'Unsure']

In [5]:
logging.basicConfig(level=logging.NOTSET)
logger = logging.getLogger()

In [6]:
metrics = []
metrics.append({'name': 'accuracy_multilabel', 'function': accuracy_multilabel})
metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh})
metrics.append({'name': 'roc_auc', 'function': roc_auc})
metrics.append({'name': 'fbeta', 'function': fbeta})

## Pipeline

#### Create a DataBunch object

In [9]:
databunch = BertDataBunch(DATA_PATH, 
                          LABEL_PATH,
                          tokenizer='bert-base-uncased',
                          train_file='train.csv',
                          val_file='val.csv',
                          text_col='text',
                          label_file='labels.csv',
                          label_col=labels,
                          batch_size_per_gpu=8,
                          max_seq_length=512,
                          multi_gpu=multi_gpu,
                          multi_label=True,
                          model_type='bert',
                          clear_cache=True)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/aa5118/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:root:Writing example 0 of 1501
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache/cached_train_multi_label_512
INFO:root:Writing example 0 of 188
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache/cached_dev_multi_label_512


#### Create a Learner object

In [9]:
learner = BertLearner.from_pretrained_model(databunch,
                                            pretrained_path='bert-base-uncased',
                                            metrics=metrics,
                                            device=device_cuda,
                                            logger=logger,
                                            output_dir=OUTPUT_DIR,
                                            finetuned_wgts_path=None,
                                            warmup_steps=500,
                                            multi_gpu=multi_gpu,
                                            is_fp16=True,
                                            multi_label=True,
                                            logging_steps=20)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-config.json HTTP/1.1" 200 0
INFO:pytorch_transformers.modeling_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/aa5118/.cache/torch/pytorch_transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 14,
  "output_attentions": false,
  "output_hid

#### Train the model

In [10]:
learner.fit(epochs=10,
            lr=6e-5,
            validate=True, 	# Evaluate the model after each epoch
            schedule_type="warmup_cosine")

INFO:root:***** Running training *****
INFO:root:  Num examples = 1501
INFO:root:  Num Epochs = 20
INFO:root:  Total train batch size (w. parallel, distributed & accumulation) = 32
INFO:root:  Gradient Accumulation steps = 1
INFO:root:  Total optimization steps = 940


data/phenotype_classification/transformer/original/output/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6848235229651133: 
INFO:root:eval_accuracy_multilabel after step 20: 0.026595744680851064: 
INFO:root:eval_accuracy_thresh after step 20: 0.5167173147201538: 
INFO:root:eval_roc_auc after step 20: 0.41002714468924506: 
INFO:root:eval_fbeta after step 20: 0.32542797923088074: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.715276351571083
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5737837851047516: 
INFO:root:eval_accuracy_multilabel after step 40: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after step 40: 0.822568416595459: 
INFO:root:eval_roc_auc after step 40: 0.5991413766728202: 
INFO:root:eval_fbeta after step 40: 0.3410872220993042: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.6381511867046357
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.5580857396125793: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8237082362174988: 
INFO:root:eval_roc_auc after epoch 1: 0.5791796782894245: 
INFO:root:eval_fbeta after epoch 1: 0.32421910762786865: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.4910062899192174: 
INFO:root:eval_accuracy_multilabel after step 60: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 60: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 60: 0.6083242755185959: 
INFO:root:eval_fbeta after step 60: 0.341076523065567: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5493393182754517
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.42475944260756177: 
INFO:root:eval_accuracy_multilabel after step 80: 0.10638297872340426: 
INFO:root:eval_accuracy_thresh after step 80: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 80: 0.643564925840939: 
INFO:root:eval_fbeta after step 80: 0.2973541021347046: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.47869707494974134
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.39617985983689624: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.05319148936170213: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 2: 0.6615748808106601: 
INFO:root:eval_fbeta after epoch 2: 0.24539975821971893: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.3839411735534668: 
INFO:root:eval_accuracy_multilabel after step 100: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 100: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 100: 0.6532445667489059: 
INFO:root:eval_fbeta after step 100: 0.19284003973007202: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.43761933147907256
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.36205873390038806: 
INFO:root:eval_accuracy_multilabel after step 120: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 120: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 120: 0.6707347756824991: 
INFO:root:eval_fbeta after step 120: 0.12755055725574493: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.40733384191989896
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.34749147295951843: 
INFO:root:eval_accuracy_multilabel after step 140: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 140: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 140: 0.6568080260865066: 
INFO:root:eval_fbeta after step 140: 0.002045826520770788: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.38489552289247514
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.3469454348087311: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 3: 0.6577936025948483: 
INFO:root:eval_fbeta after epoch 3: 0.010001818649470806: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.3416449874639511: 
INFO:root:eval_accuracy_multilabel after step 160: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 160: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 160: 0.6565772674729559: 
INFO:root:eval_fbeta after step 160: 0.0: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.3810590222477913
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3432370076576869: 
INFO:root:eval_accuracy_multilabel after step 180: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 180: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 180: 0.6674049504551911: 
INFO:root:eval_fbeta after step 180: 0.12017826735973358: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.37624176442623136
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.33995052178700763: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 4: 0.6546844717424936: 
INFO:root:eval_fbeta after epoch 4: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.33830442031224567: 
INFO:root:eval_accuracy_multilabel after step 200: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 200: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 200: 0.667814708740935: 
INFO:root:eval_fbeta after step 200: 0.027354096993803978: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.3689279228448868
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.33772335946559906: 
INFO:root:eval_accuracy_multilabel after step 220: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 220: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 220: 0.6688678594227153: 
INFO:root:eval_fbeta after step 220: 0.04393598809838295: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.37574382275342944
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.3348299413919449: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 5: 0.6738194648125464: 
INFO:root:eval_fbeta after epoch 5: 0.0667455717921257: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.33370732764403027: 
INFO:root:eval_accuracy_multilabel after step 240: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 240: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 240: 0.6775576105772273: 
INFO:root:eval_fbeta after step 240: 0.023428820073604584: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.37144800275564194
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.3330939511458079: 
INFO:root:eval_accuracy_multilabel after step 260: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 260: 0.886398196220398: 
INFO:root:eval_roc_auc after step 260: 0.6885585422381717: 
INFO:root:eval_fbeta after step 260: 0.12998396158218384: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3620541974902153
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.33705511192480725: 
INFO:root:eval_accuracy_multilabel after step 280: 0.10638297872340426: 
INFO:root:eval_accuracy_thresh after step 280: 0.886398196220398: 
INFO:root:eval_roc_auc after step 280: 0.7085418068471329: 
INFO:root:eval_fbeta after step 280: 0.1493101567029953: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.3609879553318024
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.3302903175354004: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.1276595744680851: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8879179358482361: 
INFO:root:eval_roc_auc after epoch 6: 0.7130326138840484: 
INFO:root:eval_fbeta after epoch 6: 0.14225047826766968: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.3231101334095001: 
INFO:root:eval_accuracy_multilabel after step 300: 0.22340425531914893: 
INFO:root:eval_accuracy_thresh after step 300: 0.8848784565925598: 
INFO:root:eval_roc_auc after step 300: 0.729054881730819: 
INFO:root:eval_fbeta after step 300: 0.1427101194858551: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.35896016359329225
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.3176308472951253: 
INFO:root:eval_accuracy_multilabel after step 320: 0.19148936170212766: 
INFO:root:eval_accuracy_thresh after step 320: 0.8860182762145996: 
INFO:root:eval_roc_auc after step 320: 0.7503860354376217: 
INFO:root:eval_fbeta after step 320: 0.15726444125175476: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.35287884473800657
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.3151118556658427: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.10106382978723404: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8871580362319946: 
INFO:root:eval_roc_auc after epoch 7: 0.7428206035092562: 
INFO:root:eval_fbeta after epoch 7: 0.1157122254371643: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.31109300752480823: 
INFO:root:eval_accuracy_multilabel after step 340: 0.21808510638297873: 
INFO:root:eval_accuracy_thresh after step 340: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 340: 0.7638577376741832: 
INFO:root:eval_fbeta after step 340: 0.19610191881656647: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.3385552644729614
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.3131638914346695: 
INFO:root:eval_accuracy_multilabel after step 360: 0.20212765957446807: 
INFO:root:eval_accuracy_thresh after step 360: 0.8890577554702759: 
INFO:root:eval_roc_auc after step 360: 0.7588385581109136: 
INFO:root:eval_fbeta after step 360: 0.17900320887565613: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.3260939374566078
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.304619421561559: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.26595744680851063: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.8886778354644775: 
INFO:root:eval_roc_auc after epoch 8: 0.7830049516053899: 
INFO:root:eval_fbeta after epoch 8: 0.2869456112384796: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.3034232209126155: 
INFO:root:eval_accuracy_multilabel after step 380: 0.2393617021276596: 
INFO:root:eval_accuracy_thresh after step 380: 0.8882978558540344: 
INFO:root:eval_roc_auc after step 380: 0.7820064353617088: 
INFO:root:eval_fbeta after step 380: 0.2739352285861969: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.31867584586143494
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.303069402774175: 
INFO:root:eval_accuracy_multilabel after step 400: 0.2925531914893617: 
INFO:root:eval_accuracy_thresh after step 400: 0.8909574747085571: 
INFO:root:eval_roc_auc after step 400: 0.7923287497915265: 
INFO:root:eval_fbeta after step 400: 0.33577778935432434: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.30358903780579566
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.29074664910634357: 
INFO:root:eval_accuracy_multilabel after step 420: 0.2872340425531915: 
INFO:root:eval_accuracy_thresh after step 420: 0.8955167531967163: 
INFO:root:eval_roc_auc after step 420: 0.797557121742781: 
INFO:root:eval_fbeta after step 420: 0.30010008811950684: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.3109830230474472
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.2991564820210139: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.2765957446808511: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.8920972943305969: 
INFO:root:eval_roc_auc after epoch 9: 0.7909226318846581: 
INFO:root:eval_fbeta after epoch 9: 0.3271926939487457: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.2910275459289551: 
INFO:root:eval_accuracy_multilabel after step 440: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 440: 0.8936170339584351: 
INFO:root:eval_roc_auc after step 440: 0.7925070305895343: 
INFO:root:eval_fbeta after step 440: 0.32390812039375305: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.2904456347227097
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.27908288439114887: 
INFO:root:eval_accuracy_multilabel after step 460: 0.32978723404255317: 
INFO:root:eval_accuracy_thresh after step 460: 0.9000759720802307: 
INFO:root:eval_roc_auc after step 460: 0.813783549858238: 
INFO:root:eval_fbeta after step 460: 0.3275734484195709: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.27265576720237733
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.28789974252382916: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.8928571343421936: 
INFO:root:eval_roc_auc after epoch 10: 0.7978676753909237: 
INFO:root:eval_fbeta after epoch 10: 0.3106161653995514: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 480: 0.2851452479759852: 
INFO:root:eval_accuracy_multilabel after step 480: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after step 480: 0.8943769335746765: 
INFO:root:eval_roc_auc after step 480: 0.8077284725936404: 
INFO:root:eval_fbeta after step 480: 0.3337230682373047: 
INFO:root:lr after step 480: 5.76e-05
INFO:root:train_loss after step 480: 0.26688227504491807
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 500: 0.28906971712907154: 
INFO:root:eval_accuracy_multilabel after step 500: 0.34574468085106386: 
INFO:root:eval_accuracy_thresh after step 500: 0.8974164128303528: 
INFO:root:eval_roc_auc after step 500: 0.7924595848932903: 
INFO:root:eval_fbeta after step 500: 0.34432560205459595: 
INFO:root:lr after step 500: 6e-05
INFO:root:train_loss after step 500: 0.249484633654356
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 11: 0.29163864254951477: 
INFO:root:eval_accuracy_multilabel after epoch 11: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after epoch 11: 0.8955167531967163: 
INFO:root:eval_roc_auc after epoch 11: 0.7975650293588219: 
INFO:root:eval_fbeta after epoch 11: 0.3508912920951843: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 520: 0.2824757918715477: 
INFO:root:eval_accuracy_multilabel after step 520: 0.34574468085106386: 
INFO:root:eval_accuracy_thresh after step 520: 0.901975691318512: 
INFO:root:eval_roc_auc after step 520: 0.7957549041596936: 
INFO:root:eval_fbeta after step 520: 0.3211449384689331: 
INFO:root:lr after step 520: 5.969464325642798e-05
INFO:root:train_loss after step 520: 0.2494668409228325
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 540: 0.28792832295099896: 
INFO:root:eval_accuracy_multilabel after step 540: 0.35638297872340424: 
INFO:root:eval_accuracy_thresh after step 540: 0.9000759720802307: 
INFO:root:eval_roc_auc after step 540: 0.7924969663509371: 
INFO:root:eval_fbeta after step 540: 0.31474319100379944: 
INFO:root:lr after step 540: 5.878478920843492e-05
INFO:root:train_loss after step 540: 0.2359548933804035
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 560: 0.2909298737843831: 
INFO:root:eval_accuracy_multilabel after step 560: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 560: 0.896276593208313: 
INFO:root:eval_roc_auc after step 560: 0.7894496586785368: 
INFO:root:eval_fbeta after step 560: 0.32101941108703613: 
INFO:root:lr after step 560: 5.728895986063555e-05
INFO:root:train_loss after step 560: 0.2096931032836437
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 12: 0.2915497422218323: 
INFO:root:eval_accuracy_multilabel after epoch 12: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 12: 0.890577495098114: 
INFO:root:eval_roc_auc after epoch 12: 0.7978238240656074: 
INFO:root:eval_fbeta after epoch 12: 0.3726266324520111: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 580: 0.29025957981745404: 
INFO:root:eval_accuracy_multilabel after step 580: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after step 580: 0.8917173147201538: 
INFO:root:eval_roc_auc after step 580: 0.7990516611744679: 
INFO:root:eval_fbeta after step 580: 0.3325578272342682: 
INFO:root:lr after step 580: 5.5237605984935435e-05
INFO:root:train_loss after step 580: 0.19909504130482675
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 600: 0.29506443440914154: 
INFO:root:eval_accuracy_multilabel after step 600: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 600: 0.8955167531967163: 
INFO:root:eval_roc_auc after step 600: 0.7927442590707544: 
INFO:root:eval_fbeta after step 600: 0.31072166562080383: 
INFO:root:lr after step 600: 5.267248723062775e-05
INFO:root:train_loss after step 600: 0.19564335495233537
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 13: 0.2966028501590093: 
INFO:root:eval_accuracy_multilabel after epoch 13: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 13: 0.8951367735862732: 
INFO:root:eval_roc_auc after epoch 13: 0.7927715762898041: 
INFO:root:eval_fbeta after epoch 13: 0.34465381503105164: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 620: 0.29643559952576953: 
INFO:root:eval_accuracy_multilabel after step 620: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 620: 0.8939969539642334: 
INFO:root:eval_roc_auc after step 620: 0.792023947136868: 
INFO:root:eval_fbeta after step 620: 0.33540377020835876: 
INFO:root:lr after step 620: 4.964582201835856e-05
INFO:root:train_loss after step 620: 0.17977626100182534
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 640: 0.30015023549397785: 
INFO:root:eval_accuracy_multilabel after step 640: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 640: 0.8951367735862732: 
INFO:root:eval_roc_auc after step 640: 0.7947434481806733: 
INFO:root:eval_fbeta after step 640: 0.35188600420951843: 
INFO:root:lr after step 640: 4.6219224523667933e-05
INFO:root:train_loss after step 640: 0.1594530776143074
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 14: 0.2986389845609665: 
INFO:root:eval_accuracy_multilabel after epoch 14: 0.3404255319148936: 
INFO:root:eval_accuracy_thresh after epoch 14: 0.8932371139526367: 
INFO:root:eval_roc_auc after epoch 14: 0.792654399797565: 
INFO:root:eval_fbeta after epoch 14: 0.34893208742141724: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 660: 0.29680036505063373: 
INFO:root:eval_accuracy_multilabel after step 660: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 660: 0.8958966732025146: 
INFO:root:eval_roc_auc after step 660: 0.7980912452626192: 
INFO:root:eval_fbeta after step 660: 0.33045706152915955: 
INFO:root:lr after step 660: 4.24624503900566e-05
INFO:root:train_loss after step 660: 0.16190194338560104
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 680: 0.2984897444645564: 
INFO:root:eval_accuracy_multilabel after step 680: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 680: 0.8985562324523926: 
INFO:root:eval_roc_auc after step 680: 0.8001278158301847: 
INFO:root:eval_fbeta after step 680: 0.3398211896419525: 
INFO:root:lr after step 680: 3.845197670524289e-05
INFO:root:train_loss after step 680: 0.13759037293493748
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 700: 0.3061739305655162: 
INFO:root:eval_accuracy_multilabel after step 700: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 700: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 700: 0.7960388594629724: 
INFO:root:eval_fbeta after step 700: 0.3867374360561371: 
INFO:root:lr after step 700: 3.426944514819856e-05
INFO:root:train_loss after step 700: 0.138852896168828
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 15: 0.30573462943236035: 
INFO:root:eval_accuracy_multilabel after epoch 15: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after epoch 15: 0.8939969539642334: 
INFO:root:eval_roc_auc after epoch 15: 0.7946169263240225: 
INFO:root:eval_fbeta after epoch 15: 0.3592858612537384: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 720: 0.31149208545684814: 
INFO:root:eval_accuracy_multilabel after step 720: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after step 720: 0.8932371139526367: 
INFO:root:eval_roc_auc after step 720: 0.7891189765531996: 
INFO:root:eval_fbeta after step 720: 0.34024834632873535: 
INFO:root:lr after step 720: 3e-05
INFO:root:train_loss after step 720: 0.13071989603340625
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 740: 0.31572124858697254: 
INFO:root:eval_accuracy_multilabel after step 740: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 740: 0.8909574747085571: 
INFO:root:eval_roc_auc after step 740: 0.7871657953911537: 
INFO:root:eval_fbeta after step 740: 0.370752215385437: 
INFO:root:lr after step 740: 2.573055485180145e-05
INFO:root:train_loss after step 740: 0.11908238343894481
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 16: 0.3170862744251887: 
INFO:root:eval_accuracy_multilabel after epoch 16: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after epoch 16: 0.8924772143363953: 
INFO:root:eval_roc_auc after epoch 16: 0.7850041407153086: 
INFO:root:eval_fbeta after epoch 16: 0.3538195788860321: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 760: 0.3135870099067688: 
INFO:root:eval_accuracy_multilabel after step 760: 0.2925531914893617: 
INFO:root:eval_accuracy_thresh after step 760: 0.8966565728187561: 
INFO:root:eval_roc_auc after step 760: 0.7910124911578474: 
INFO:root:eval_fbeta after step 760: 0.3488994240760803: 
INFO:root:lr after step 760: 2.154802329475711e-05
INFO:root:train_loss after step 760: 0.11136390678584576
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 780: 0.31330008308092755: 
INFO:root:eval_accuracy_multilabel after step 780: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 780: 0.896276593208313: 
INFO:root:eval_roc_auc after step 780: 0.7917141123629108: 
INFO:root:eval_fbeta after step 780: 0.3533134460449219: 
INFO:root:lr after step 780: 1.753754960994341e-05
INFO:root:train_loss after step 780: 0.10177420675754548
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 17: 0.316981961329778: 
INFO:root:eval_accuracy_multilabel after epoch 17: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after epoch 17: 0.890577495098114: 
INFO:root:eval_roc_auc after epoch 17: 0.7933934024602749: 
INFO:root:eval_fbeta after epoch 17: 0.3549114763736725: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 800: 0.31700797379016876: 
INFO:root:eval_accuracy_multilabel after step 800: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 800: 0.890577495098114: 
INFO:root:eval_roc_auc after step 800: 0.793481105110908: 
INFO:root:eval_fbeta after step 800: 0.34991779923439026: 
INFO:root:lr after step 800: 1.3780775476332083e-05
INFO:root:train_loss after step 800: 0.10468153320252896
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 820: 0.3181575685739517: 
INFO:root:eval_accuracy_multilabel after step 820: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 820: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 820: 0.7904510504189599: 
INFO:root:eval_fbeta after step 820: 0.35852667689323425: 
INFO:root:lr after step 820: 1.035417798164145e-05
INFO:root:train_loss after step 820: 0.09751340597867966
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 840: 0.317247877518336: 
INFO:root:eval_accuracy_multilabel after step 840: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 840: 0.8943769335746765: 
INFO:root:eval_roc_auc after step 840: 0.7905380341954072: 
INFO:root:eval_fbeta after step 840: 0.35032030940055847: 
INFO:root:lr after step 840: 7.3275127693722555e-06
INFO:root:train_loss after step 840: 0.09247569218277932
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 18: 0.3161810388167699: 
INFO:root:eval_accuracy_multilabel after epoch 18: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 18: 0.8920972943305969: 
INFO:root:eval_roc_auc after epoch 18: 0.7918262567358512: 
INFO:root:eval_fbeta after epoch 18: 0.35417118668556213: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 860: 0.3168712208668391: 
INFO:root:eval_accuracy_multilabel after step 860: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 860: 0.8924772143363953: 
INFO:root:eval_roc_auc after step 860: 0.7934545067660439: 
INFO:root:eval_fbeta after step 860: 0.3499216139316559: 
INFO:root:lr after step 860: 4.76239401506456e-06
INFO:root:train_loss after step 860: 0.09203909933567048
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 880: 0.3165478656689326: 
INFO:root:eval_accuracy_multilabel after step 880: 0.32978723404255317: 
INFO:root:eval_accuracy_thresh after step 880: 0.8936170339584351: 
INFO:root:eval_roc_auc after step 880: 0.790755134199433: 
INFO:root:eval_fbeta after step 880: 0.35140088200569153: 
INFO:root:lr after step 880: 2.711040139364447e-06
INFO:root:train_loss after step 880: 0.09014642089605332
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 19: 0.31725341578324634: 
INFO:root:eval_accuracy_multilabel after epoch 19: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 19: 0.8936170339584351: 
INFO:root:eval_roc_auc after epoch 19: 0.7911864587107422: 
INFO:root:eval_fbeta after epoch 19: 0.35212621092796326: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 900: 0.3183351506789525: 
INFO:root:eval_accuracy_multilabel after step 900: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 900: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 900: 0.791103788179408: 
INFO:root:eval_fbeta after step 900: 0.35263797640800476: 
INFO:root:lr after step 900: 1.2152107915650823e-06
INFO:root:train_loss after step 900: 0.08820373676717282
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 920: 0.3184267332156499: 
INFO:root:eval_accuracy_multilabel after step 920: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 920: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 920: 0.7912561895067373: 
INFO:root:eval_fbeta after step 920: 0.35287976264953613: 
INFO:root:lr after step 920: 3.053567435720195e-07
INFO:root:train_loss after step 920: 0.08838593736290931
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 940: 0.31818415721257526: 
INFO:root:eval_accuracy_multilabel after step 940: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 940: 0.8928571343421936: 
INFO:root:eval_roc_auc after step 940: 0.7913374222897006: 
INFO:root:eval_fbeta after step 940: 0.35287976264953613: 
INFO:root:lr after step 940: 0.0
INFO:root:train_loss after step 940: 0.08576637730002404
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 20: 0.31818415721257526: 
INFO:root:eval_accuracy_multilabel after epoch 20: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 20: 0.8928571343421936: 
INFO:root:eval_roc_auc after epoch 20: 0.7913374222897006: 
INFO:root:eval_fbeta after epoch 20: 0.35287976264953613: 


(940, 0.26950127923900774)

#### Save the model

In [11]:
learner.save_model()

## Wrap into one function

Let's make all the above into a function so we can call it multiple times easily and sequentially

In [7]:
BASE = Path('data/phenotype_classification/')
LABEL_PATH = BASE
BIOBERT_PATH = Path('biobert/')

def train(path_to_directory, model):
        
    DATA_PATH = BASE/path_to_directory
    OUTPUT_DIR = DATA_PATH/'output'/model 
    OUTPUT_DIR.mkdir(parents=True,exist_ok=True)
    
    if (model == "biobert"):
        tokenizer = BertTokenizer.from_pretrained(BIOBERT_PATH, 
                                                  do_lower_case=True)
        pretrained_path=BIOBERT_PATH
    elif (model == "bert"):
        tokenizer = "bert-base-uncased"
        pretrained_path="bert-base-uncased"
    else:
        print ("Model parameter must be either 'bert' or 'biobert'")
        return
    
    databunch = BertDataBunch(DATA_PATH, 
                              LABEL_PATH,
                              tokenizer=tokenizer,
                              train_file='train.csv',
                              val_file='val.csv',
                              text_col='text',
                              label_file='labels.csv',
                              label_col=labels,
                              batch_size_per_gpu=8,
                              max_seq_length=512,
                              multi_gpu=multi_gpu,
                              multi_label=True,
                              model_type='bert',
                              clear_cache=True)
    
    learner = BertLearner.from_pretrained_model(databunch,
                                                pretrained_path=pretrained_path,
                                                metrics=metrics,
                                                device=device_cuda,
                                                logger=logger,
                                                output_dir=OUTPUT_DIR,
                                                finetuned_wgts_path=None,
                                                warmup_steps=500,
                                                multi_gpu=multi_gpu,
                                                is_fp16=True,
                                                multi_label=True,
                                                logging_steps=20)
    
    learner.fit(epochs=10,
                lr=6e-5,
                validate=True, # Evaluate the model after each epoch
                schedule_type="warmup_cosine")
    
    learner.save_model()
    
    return

Let's get training!

In [8]:
for directory in ['original','original_2x','synthetic','combined']:
    for model in ['biobert','bert']:
        train('transformer/'+directory, model)

INFO:pytorch_transformers.tokenization_utils:Model name 'biobert' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming 'biobert' is a path or url to a directory containing tokenizer files.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/added_tokens.json. We won't load it.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/special_tokens_map.json. We won't load it.
INFO:pytorch_transformers.tokenization_utils:loading file None
INFO:pytorch_transformers.tokenization_utils:loading file None
INFO:pytorch_transformers.tokenization_utils:loading file biobert/vocab.tx

INFO:root:eval_loss after step 20: 0.6245470146338145: 
INFO:root:eval_accuracy_multilabel after step 20: 0.09574468085106383: 
INFO:root:eval_accuracy_thresh after step 20: 0.7481003403663635: 
INFO:root:eval_roc_auc after step 20: 0.570418039716361: 
INFO:root:eval_fbeta after step 20: 0.34148284792900085: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.654225566983223
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5243857502937317: 
INFO:root:eval_accuracy_multilabel after step 40: 0.09574468085106383: 
INFO:root:eval_accuracy_thresh after step 40: 0.8461246490478516: 
INFO:root:eval_roc_auc after step 40: 0.5837567502286021: 
INFO:root:eval_fbeta after step 40: 0.315360426902771: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.5891541808843612
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.48875992000102997: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.07446808510638298: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 1: 0.5992463323039054: 
INFO:root:eval_fbeta after epoch 1: 0.3081028163433075: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.4463123132785161: 
INFO:root:eval_accuracy_multilabel after step 60: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 60: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 60: 0.6106404881443269: 
INFO:root:eval_fbeta after step 60: 0.3013382852077484: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5029118552803993
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.396735817193985: 
INFO:root:eval_accuracy_multilabel after step 80: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 80: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 80: 0.63033332758234: 
INFO:root:eval_fbeta after step 80: 0.11696664988994598: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.4432420089840889
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.37374845643838245: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 2: 0.643541202992817: 
INFO:root:eval_fbeta after epoch 2: 0.11696664988994598: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.36633211374282837: 
INFO:root:eval_accuracy_multilabel after step 100: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 100: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 100: 0.6527046922355837: 
INFO:root:eval_fbeta after step 100: 0.11696664988994598: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.4148416742682457
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.35101109743118286: 
INFO:root:eval_accuracy_multilabel after step 120: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 120: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 120: 0.6547743310156829: 
INFO:root:eval_fbeta after step 120: 0.11696664988994598: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.3915459156036377
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.3431871434052785: 
INFO:root:eval_accuracy_multilabel after step 140: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 140: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 140: 0.6631830023636583: 
INFO:root:eval_fbeta after step 140: 0.0: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.3747246503829956
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.3429449399312337: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 3: 0.6595030854080042: 
INFO:root:eval_fbeta after epoch 3: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.34257545073827106: 
INFO:root:eval_accuracy_multilabel after step 160: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 160: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 160: 0.6664826348751748: 
INFO:root:eval_fbeta after step 160: 0.11696664988994598: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.37271821647882464
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3373168458541234: 
INFO:root:eval_accuracy_multilabel after step 180: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 180: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 180: 0.6728482657879149: 
INFO:root:eval_fbeta after step 180: 0.0: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.37464589178562163
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.33789828419685364: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 4: 0.6628293162643848: 
INFO:root:eval_fbeta after epoch 4: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.3385683496793111: 
INFO:root:eval_accuracy_multilabel after step 200: 0.06914893617021277: 
INFO:root:eval_accuracy_thresh after step 200: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 200: 0.6637624149571838: 
INFO:root:eval_fbeta after step 200: 0.10615451633930206: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.3785696804523468
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.3316543896993001: 
INFO:root:eval_accuracy_multilabel after step 220: 0.18617021276595744: 
INFO:root:eval_accuracy_thresh after step 220: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 220: 0.6867318829327766: 
INFO:root:eval_fbeta after step 220: 0.0: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.37278774231672285
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.3237042923768361: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.1276595744680851: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 5: 0.7240378587901061: 
INFO:root:eval_fbeta after epoch 5: 0.07753541320562363: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.33139658470948535: 
INFO:root:eval_accuracy_multilabel after step 240: 0.18617021276595744: 
INFO:root:eval_accuracy_thresh after step 240: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 240: 0.6898970859716016: 
INFO:root:eval_fbeta after step 240: 0.12625467777252197: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.36111079901456833
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.31256988147894543: 
INFO:root:eval_accuracy_multilabel after step 260: 0.20212765957446807: 
INFO:root:eval_accuracy_thresh after step 260: 0.8860182762145996: 
INFO:root:eval_roc_auc after step 260: 0.7551040067171604: 
INFO:root:eval_fbeta after step 260: 0.1497781127691269: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3514014944434166
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.3062266657749812: 
INFO:root:eval_accuracy_multilabel after step 280: 0.2765957446808511: 
INFO:root:eval_accuracy_thresh after step 280: 0.8871580362319946: 
INFO:root:eval_roc_auc after step 280: 0.7752360782825234: 
INFO:root:eval_fbeta after step 280: 0.23648862540721893: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.34849668145179746
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.3058999677499135: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.2978723404255319: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8901975750923157: 
INFO:root:eval_roc_auc after epoch 6: 0.7823277721226342: 
INFO:root:eval_fbeta after epoch 6: 0.22636562585830688: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.30291029810905457: 
INFO:root:eval_accuracy_multilabel after step 300: 0.22340425531914893: 
INFO:root:eval_accuracy_thresh after step 300: 0.890577495098114: 
INFO:root:eval_roc_auc after step 300: 0.7756997521321808: 
INFO:root:eval_fbeta after step 300: 0.18562127649784088: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.33296217024326324
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.3014759471019109: 
INFO:root:eval_accuracy_multilabel after step 320: 0.2978723404255319: 
INFO:root:eval_accuracy_thresh after step 320: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 320: 0.7861356486833101: 
INFO:root:eval_fbeta after step 320: 0.268721342086792: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.3221500277519226
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.2927006234725316: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8917173147201538: 
INFO:root:eval_roc_auc after epoch 7: 0.8014217893641127: 
INFO:root:eval_fbeta after epoch 7: 0.2410414218902588: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.29008324444293976: 
INFO:root:eval_accuracy_multilabel after step 340: 0.39361702127659576: 
INFO:root:eval_accuracy_thresh after step 340: 0.8977963924407959: 
INFO:root:eval_roc_auc after step 340: 0.7986275254050137: 
INFO:root:eval_fbeta after step 340: 0.2397884577512741: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.3169857904314995
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.2862514356772105: 
INFO:root:eval_accuracy_multilabel after step 360: 0.3723404255319149: 
INFO:root:eval_accuracy_thresh after step 360: 0.896276593208313: 
INFO:root:eval_roc_auc after step 360: 0.8076328623269668: 
INFO:root:eval_fbeta after step 360: 0.3266519010066986: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.29434763491153715
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.2912153551975886: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.3829787234042553: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.8939969539642334: 
INFO:root:eval_roc_auc after epoch 8: 0.7966614045076287: 
INFO:root:eval_fbeta after epoch 8: 0.2910662889480591: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.28818843762079877: 
INFO:root:eval_accuracy_multilabel after step 380: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after step 380: 0.8943769335746765: 
INFO:root:eval_roc_auc after step 380: 0.8044978520039338: 
INFO:root:eval_fbeta after step 380: 0.28878846764564514: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.3002920925617218
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.2753410389026006: 
INFO:root:eval_accuracy_multilabel after step 400: 0.40425531914893614: 
INFO:root:eval_accuracy_thresh after step 400: 0.8985562324523926: 
INFO:root:eval_roc_auc after step 400: 0.8231612636082883: 
INFO:root:eval_fbeta after step 400: 0.30285683274269104: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.2672307625412941
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.2738150010506312: 
INFO:root:eval_accuracy_multilabel after step 420: 0.3617021276595745: 
INFO:root:eval_accuracy_thresh after step 420: 0.9000759720802307: 
INFO:root:eval_roc_auc after step 420: 0.8228557420794442: 
INFO:root:eval_fbeta after step 420: 0.3532481789588928: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.28234087526798246
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.27347853283087414: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.3617021276595745: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.9012157917022705: 
INFO:root:eval_roc_auc after epoch 9: 0.8181658068931408: 
INFO:root:eval_fbeta after epoch 9: 0.3270616829395294: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.2730424602826436: 
INFO:root:eval_accuracy_multilabel after step 440: 0.40425531914893614: 
INFO:root:eval_accuracy_thresh after step 440: 0.8996960520744324: 
INFO:root:eval_roc_auc after step 440: 0.8268814375183312: 
INFO:root:eval_fbeta after step 440: 0.3144906461238861: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.2593979112803936
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.27865376075108844: 
INFO:root:eval_accuracy_multilabel after step 460: 0.3776595744680851: 
INFO:root:eval_accuracy_thresh after step 460: 0.9004559516906738: 
INFO:root:eval_roc_auc after step 460: 0.8138029594612469: 
INFO:root:eval_fbeta after step 460: 0.3257046937942505: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.24971631839871405
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.2874804387489955: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.8909574747085571: 
INFO:root:eval_roc_auc after epoch 10: 0.8127764071243306: 
INFO:root:eval_fbeta after epoch 10: 0.3293944299221039: 
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/aa5118/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:root:Writing example 0 of 1501
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache

data/phenotype_classification/transformer/original/output/bert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6168104112148285: 
INFO:root:eval_accuracy_multilabel after step 20: 0.0425531914893617: 
INFO:root:eval_accuracy_thresh after step 20: 0.7302431464195251: 
INFO:root:eval_roc_auc after step 20: 0.5351256304526607: 
INFO:root:eval_fbeta after step 20: 0.3417436182498932: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.6386248201131821
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5410069127877554: 
INFO:root:eval_accuracy_multilabel after step 40: 0.09574468085106383: 
INFO:root:eval_accuracy_thresh after step 40: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 40: 0.5134623568721496: 
INFO:root:eval_fbeta after step 40: 0.34148284792900085: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.5882206469774246
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.512977808713913: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.09574468085106383: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 1: 0.5623321428776821: 
INFO:root:eval_fbeta after epoch 1: 0.3413017988204956: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.46905215084552765: 
INFO:root:eval_accuracy_multilabel after step 60: 0.13829787234042554: 
INFO:root:eval_accuracy_thresh after step 60: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 60: 0.6113104788852274: 
INFO:root:eval_fbeta after step 60: 0.3259706497192383: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5218498587608338
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.421964759627978: 
INFO:root:eval_accuracy_multilabel after step 80: 0.09574468085106383: 
INFO:root:eval_accuracy_thresh after step 80: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 80: 0.6283794275461085: 
INFO:root:eval_fbeta after step 80: 0.23773682117462158: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.4707561761140823
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.39552480975786847: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 2: 0.6351145597902038: 
INFO:root:eval_fbeta after epoch 2: 0.14909055829048157: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.38514823218186695: 
INFO:root:eval_accuracy_multilabel after step 100: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 100: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 100: 0.6534666988722301: 
INFO:root:eval_fbeta after step 100: 0.16125187277793884: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.42956418395042417
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.3619111080964406: 
INFO:root:eval_accuracy_multilabel after step 120: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 120: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 120: 0.660760396358471: 
INFO:root:eval_fbeta after step 120: 0.11164750158786774: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.40612967908382414
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.35002829631169635: 
INFO:root:eval_accuracy_multilabel after step 140: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 140: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 140: 0.6538196660973183: 
INFO:root:eval_fbeta after step 140: 0.0: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.3820280969142914
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.34962592522303265: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 3: 0.6542596170988537: 
INFO:root:eval_fbeta after epoch 3: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.3452150970697403: 
INFO:root:eval_accuracy_multilabel after step 160: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 160: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 160: 0.6496882961531605: 
INFO:root:eval_fbeta after step 160: 0.0: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.3818462908267975
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.34006396929423016: 
INFO:root:eval_accuracy_multilabel after step 180: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 180: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 180: 0.6577389681567491: 
INFO:root:eval_fbeta after step 180: 0.0: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.37557544112205504
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.3392099638779958: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 4: 0.6693545372463092: 
INFO:root:eval_fbeta after epoch 4: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.34335148831208545: 
INFO:root:eval_accuracy_multilabel after step 200: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 200: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 200: 0.6589143274500671: 
INFO:root:eval_fbeta after step 200: 0.11696664988994598: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.37502686232328414
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.33814631899197894: 
INFO:root:eval_accuracy_multilabel after step 220: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 220: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 220: 0.6526263349493625: 
INFO:root:eval_fbeta after step 220: 0.0: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.3697801113128662
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.34092337886492413: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 5: 0.6527054111097692: 
INFO:root:eval_fbeta after epoch 5: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.34072432418664295: 
INFO:root:eval_accuracy_multilabel after step 240: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 240: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 240: 0.6559151843480961: 
INFO:root:eval_fbeta after step 240: 0.045640841126441956: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.3791113406419754
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.335010493795077: 
INFO:root:eval_accuracy_multilabel after step 260: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 260: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 260: 0.6754549035845944: 
INFO:root:eval_fbeta after step 260: 0.09485907852649689: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3706378787755966
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.33040114243825275: 
INFO:root:eval_accuracy_multilabel after step 280: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 280: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 280: 0.6979118142659145: 
INFO:root:eval_fbeta after step 280: 0.035767462104558945: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.36604301631450653
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.32816416521867114: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.05319148936170213: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 6: 0.6983417010288527: 
INFO:root:eval_fbeta after epoch 6: 0.05439967289566994: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.32681674261887866: 
INFO:root:eval_accuracy_multilabel after step 300: 0.10106382978723404: 
INFO:root:eval_accuracy_thresh after step 300: 0.8871580362319946: 
INFO:root:eval_roc_auc after step 300: 0.7074735598074569: 
INFO:root:eval_fbeta after step 300: 0.07586817443370819: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.34912184774875643
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.32455016175905865: 
INFO:root:eval_accuracy_multilabel after step 320: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 320: 0.8875380158424377: 
INFO:root:eval_roc_auc after step 320: 0.7143955993397859: 
INFO:root:eval_fbeta after step 320: 0.07962185889482498: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.3686760440468788
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.3193390717109044: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.12234042553191489: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8875380158424377: 
INFO:root:eval_roc_auc after epoch 7: 0.7288119022561146: 
INFO:root:eval_fbeta after epoch 7: 0.0734366849064827: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.3181483596563339: 
INFO:root:eval_accuracy_multilabel after step 340: 0.1702127659574468: 
INFO:root:eval_accuracy_thresh after step 340: 0.8875380158424377: 
INFO:root:eval_roc_auc after step 340: 0.7306622844096318: 
INFO:root:eval_fbeta after step 340: 0.1375420242547989: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.33757384568452836
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.31512369215488434: 
INFO:root:eval_accuracy_multilabel after step 360: 0.19148936170212766: 
INFO:root:eval_accuracy_thresh after step 360: 0.8871580362319946: 
INFO:root:eval_roc_auc after step 360: 0.751589430824175: 
INFO:root:eval_fbeta after step 360: 0.17472992837429047: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.3513926163315773
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.31198690334955853: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.17553191489361702: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.8879179358482361: 
INFO:root:eval_roc_auc after epoch 8: 0.7562053219693701: 
INFO:root:eval_fbeta after epoch 8: 0.14650824666023254: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.3155854294697444: 
INFO:root:eval_accuracy_multilabel after step 380: 0.1595744680851064: 
INFO:root:eval_accuracy_thresh after step 380: 0.8882978558540344: 
INFO:root:eval_roc_auc after step 380: 0.7573921832496564: 
INFO:root:eval_fbeta after step 380: 0.2105766385793686: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.3373250290751457
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.30437900125980377: 
INFO:root:eval_accuracy_multilabel after step 400: 0.23404255319148937: 
INFO:root:eval_accuracy_thresh after step 400: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 400: 0.7782388157554216: 
INFO:root:eval_fbeta after step 400: 0.22171999514102936: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.3225798398256302
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.29876377681891125: 
INFO:root:eval_accuracy_multilabel after step 420: 0.22872340425531915: 
INFO:root:eval_accuracy_thresh after step 420: 0.8901975750923157: 
INFO:root:eval_roc_auc after step 420: 0.7817720823772307: 
INFO:root:eval_fbeta after step 420: 0.16558991372585297: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.32201103270053866
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.29697540899117786: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.14893617021276595: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.8928571343421936: 
INFO:root:eval_roc_auc after epoch 9: 0.7894000563597363: 
INFO:root:eval_fbeta after epoch 9: 0.19081667065620422: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.2948802461226781: 
INFO:root:eval_accuracy_multilabel after step 440: 0.22872340425531915: 
INFO:root:eval_accuracy_thresh after step 440: 0.8917173147201538: 
INFO:root:eval_roc_auc after step 440: 0.786523121869303: 
INFO:root:eval_fbeta after step 440: 0.23403243720531464: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.32089960426092146
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.30404085914293927: 
INFO:root:eval_accuracy_multilabel after step 460: 0.21808510638297873: 
INFO:root:eval_accuracy_thresh after step 460: 0.8917173147201538: 
INFO:root:eval_roc_auc after step 460: 0.7824686714629954: 
INFO:root:eval_fbeta after step 460: 0.30213189125061035: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.29058709293603896
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.28648675481478375: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.896276593208313: 
INFO:root:eval_roc_auc after epoch 10: 0.7986433406370951: 
INFO:root:eval_fbeta after epoch 10: 0.2850111722946167: 
INFO:pytorch_transformers.tokenization_utils:Model name 'biobert' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming 'biobert' is a path or url to a directory containing tokenizer files.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/added_tokens.json. We won't load it.

data/phenotype_classification/transformer/original_2x/output/biobert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6117053925991058: 
INFO:root:eval_accuracy_multilabel after step 20: 0.15691489361702127: 
INFO:root:eval_accuracy_thresh after step 20: 0.6635638475418091: 
INFO:root:eval_roc_auc after step 20: 0.6064565266148325: 
INFO:root:eval_fbeta after step 20: 0.38101062178611755: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.6329127281904221
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5178529669841131: 
INFO:root:eval_accuracy_multilabel after step 40: 0.16489361702127658: 
INFO:root:eval_accuracy_thresh after step 40: 0.8655015230178833: 
INFO:root:eval_roc_auc after step 40: 0.6059238561602871: 
INFO:root:eval_fbeta after step 40: 0.3642561435699463: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.5677183508872986
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.44537948817014694: 
INFO:root:eval_accuracy_multilabel after step 60: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after step 60: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 60: 0.6132740854266348: 
INFO:root:eval_fbeta after step 60: 0.26868048310279846: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.4848367065191269
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.41125082472960156: 
INFO:root:eval_accuracy_multilabel after step 80: 0.1595744680851064: 
INFO:root:eval_accuracy_thresh after step 80: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 80: 0.6343469647129186: 
INFO:root:eval_fbeta after step 80: 0.22035013139247894: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.43723011314868926
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.39624155809481937: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.10106382978723404: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8662614226341248: 
INFO:root:eval_roc_auc after epoch 1: 0.6334540408193781: 
INFO:root:eval_fbeta after epoch 1: 0.22035013139247894: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.3931389773885409: 
INFO:root:eval_accuracy_multilabel after step 100: 0.14627659574468085: 
INFO:root:eval_accuracy_thresh after step 100: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 100: 0.6248998517244817: 
INFO:root:eval_fbeta after step 100: 0.016984200105071068: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.39646550863981245
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.3832535296678543: 
INFO:root:eval_accuracy_multilabel after step 120: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 120: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 120: 0.6430421090011962: 
INFO:root:eval_fbeta after step 120: 0.1166997030377388: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.3866190627217293
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.3791631609201431: 
INFO:root:eval_accuracy_multilabel after step 140: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 140: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 140: 0.6449257687898724: 
INFO:root:eval_fbeta after step 140: 0.08297102898359299: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.37441820502281187
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.37737539907296497: 
INFO:root:eval_accuracy_multilabel after step 160: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 160: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 160: 0.6474358614932217: 
INFO:root:eval_fbeta after step 160: 0.0: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.3791820675134659
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3772873009244601: 
INFO:root:eval_accuracy_multilabel after step 180: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 180: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 180: 0.6581110758074162: 
INFO:root:eval_fbeta after step 180: 0.11817725002765656: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.3710113033652306
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.37725691745678586: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8662614226341248: 
INFO:root:eval_roc_auc after epoch 2: 0.6527005769039074: 
INFO:root:eval_fbeta after epoch 2: 0.025229323655366898: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.3789601648847262: 
INFO:root:eval_accuracy_multilabel after step 200: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 200: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 200: 0.6397699237440191: 
INFO:root:eval_fbeta after step 200: 0.0: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.3676414221525192
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.3776593630512555: 
INFO:root:eval_accuracy_multilabel after step 220: 0.09308510638297872: 
INFO:root:eval_accuracy_thresh after step 220: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 220: 0.6622423868620415: 
INFO:root:eval_fbeta after step 220: 0.17862741649150848: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.36884785145521165
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.37400661905606586: 
INFO:root:eval_accuracy_multilabel after step 240: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 240: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 240: 0.6636385441586923: 
INFO:root:eval_fbeta after step 240: 0.08207060396671295: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.3724533304572105
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.36860495805740356: 
INFO:root:eval_accuracy_multilabel after step 260: 0.10638297872340426: 
INFO:root:eval_accuracy_thresh after step 260: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 260: 0.6862253289473684: 
INFO:root:eval_fbeta after step 260: 0.10631096363067627: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3609752967953682
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.35963888466358185: 
INFO:root:eval_accuracy_multilabel after step 280: 0.15691489361702127: 
INFO:root:eval_accuracy_thresh after step 280: 0.8675912022590637: 
INFO:root:eval_roc_auc after step 280: 0.7123500112141149: 
INFO:root:eval_fbeta after step 280: 0.11089780181646347: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.3672733336687088
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.35747497032086056: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.14627659574468085: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8660714626312256: 
INFO:root:eval_roc_auc after epoch 3: 0.7177759295255184: 
INFO:root:eval_fbeta after epoch 3: 0.1401984840631485: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.3440324018398921: 
INFO:root:eval_accuracy_multilabel after step 300: 0.25: 
INFO:root:eval_accuracy_thresh after step 300: 0.8687310218811035: 
INFO:root:eval_roc_auc after step 300: 0.7521842603668261: 
INFO:root:eval_fbeta after step 300: 0.23185665905475616: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.34352619796991346
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.3323151369889577: 
INFO:root:eval_accuracy_multilabel after step 320: 0.4095744680851064: 
INFO:root:eval_accuracy_thresh after step 320: 0.8774695992469788: 
INFO:root:eval_roc_auc after step 320: 0.769785125099681: 
INFO:root:eval_fbeta after step 320: 0.24921715259552002: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.3284214407205582
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.32126639783382416: 
INFO:root:eval_accuracy_multilabel after step 340: 0.3776595744680851: 
INFO:root:eval_accuracy_thresh after step 340: 0.8778495788574219: 
INFO:root:eval_roc_auc after step 340: 0.8047895172946572: 
INFO:root:eval_fbeta after step 340: 0.35956689715385437: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.33399702310562135
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.30647679915030795: 
INFO:root:eval_accuracy_multilabel after step 360: 0.43882978723404253: 
INFO:root:eval_accuracy_thresh after step 360: 0.8805091381072998: 
INFO:root:eval_roc_auc after step 360: 0.823003576056619: 
INFO:root:eval_fbeta after step 360: 0.33599400520324707: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.3036912217736244
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.2997166191538175: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.4601063829787234: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8844984769821167: 
INFO:root:eval_roc_auc after epoch 4: 0.8364066238038277: 
INFO:root:eval_fbeta after epoch 4: 0.4042868912220001: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.29767608394225437: 
INFO:root:eval_accuracy_multilabel after step 380: 0.4521276595744681: 
INFO:root:eval_accuracy_thresh after step 380: 0.8848784565925598: 
INFO:root:eval_roc_auc after step 380: 0.8363729814593301: 
INFO:root:eval_fbeta after step 380: 0.35141047835350037: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.30166812241077423
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.28226334353288013: 
INFO:root:eval_accuracy_multilabel after step 400: 0.48404255319148937: 
INFO:root:eval_accuracy_thresh after step 400: 0.8928571343421936: 
INFO:root:eval_roc_auc after step 400: 0.8582294470195375: 
INFO:root:eval_fbeta after step 400: 0.40067845582962036: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.29417201578617097
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.27204101408521336: 
INFO:root:eval_accuracy_multilabel after step 420: 0.5106382978723404: 
INFO:root:eval_accuracy_thresh after step 420: 0.9008358716964722: 
INFO:root:eval_roc_auc after step 420: 0.8677393279007176: 
INFO:root:eval_fbeta after step 420: 0.4545802175998688: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.2729350499808788
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.2695640077193578: 
INFO:root:eval_accuracy_multilabel after step 440: 0.4946808510638298: 
INFO:root:eval_accuracy_thresh after step 440: 0.900645911693573: 
INFO:root:eval_roc_auc after step 440: 0.8767603045255183: 
INFO:root:eval_fbeta after step 440: 0.4969664216041565: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.2778170481324196
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.255771250774463: 
INFO:root:eval_accuracy_multilabel after step 460: 0.523936170212766: 
INFO:root:eval_accuracy_thresh after step 460: 0.9069148898124695: 
INFO:root:eval_roc_auc after step 460: 0.8825199985047848: 
INFO:root:eval_fbeta after step 460: 0.46928906440734863: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.2631070278584957
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.2542271427810192: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.5452127659574468: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.9002659916877747: 
INFO:root:eval_roc_auc after epoch 5: 0.8902976101475278: 
INFO:root:eval_fbeta after epoch 5: 0.44934558868408203: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 480: 0.2451170695324739: 
INFO:root:eval_accuracy_multilabel after step 480: 0.5186170212765957: 
INFO:root:eval_accuracy_thresh after step 480: 0.9065349698066711: 
INFO:root:eval_roc_auc after step 480: 0.8984223920952951: 
INFO:root:eval_fbeta after step 480: 0.5041243433952332: 
INFO:root:lr after step 480: 5.76e-05
INFO:root:train_loss after step 480: 0.2474733769893646
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 500: 0.23641017576058707: 
INFO:root:eval_accuracy_multilabel after step 500: 0.526595744680851: 
INFO:root:eval_accuracy_thresh after step 500: 0.9116641283035278: 
INFO:root:eval_roc_auc after step 500: 0.907116913377193: 
INFO:root:eval_fbeta after step 500: 0.507437527179718: 
INFO:root:lr after step 500: 6e-05
INFO:root:train_loss after step 500: 0.231788931787014
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 520: 0.22922664135694504: 
INFO:root:eval_accuracy_multilabel after step 520: 0.5132978723404256: 
INFO:root:eval_accuracy_thresh after step 520: 0.9133738875389099: 
INFO:root:eval_roc_auc after step 520: 0.9103445536782295: 
INFO:root:eval_fbeta after step 520: 0.5520377159118652: 
INFO:root:lr after step 520: 5.969464325642798e-05
INFO:root:train_loss after step 520: 0.22897428199648856
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 540: 0.21765092263619104: 
INFO:root:eval_accuracy_multilabel after step 540: 0.5585106382978723: 
INFO:root:eval_accuracy_thresh after step 540: 0.9207826852798462: 
INFO:root:eval_roc_auc after step 540: 0.9213412392842903: 
INFO:root:eval_fbeta after step 540: 0.5563710331916809: 
INFO:root:lr after step 540: 5.878478920843492e-05
INFO:root:train_loss after step 540: 0.22326066568493844
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 560: 0.20840253805120787: 
INFO:root:eval_accuracy_multilabel after step 560: 0.5851063829787234: 
INFO:root:eval_accuracy_thresh after step 560: 0.9192629456520081: 
INFO:root:eval_roc_auc after step 560: 0.9322932241826156: 
INFO:root:eval_fbeta after step 560: 0.5527920722961426: 
INFO:root:lr after step 560: 5.728895986063555e-05
INFO:root:train_loss after step 560: 0.2187669172883034
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.20608860378464064: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.5691489361702128: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.9230623245239258: 
INFO:root:eval_roc_auc after epoch 6: 0.9375210264653111: 
INFO:root:eval_fbeta after epoch 6: 0.6121448278427124: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 580: 0.2018375446399053: 
INFO:root:eval_accuracy_multilabel after step 580: 0.5372340425531915: 
INFO:root:eval_accuracy_thresh after step 580: 0.9217325448989868: 
INFO:root:eval_roc_auc after step 580: 0.9355289635665869: 
INFO:root:eval_fbeta after step 580: 0.587344765663147: 
INFO:root:lr after step 580: 5.5237605984935435e-05
INFO:root:train_loss after step 580: 0.2051673337817192
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 600: 0.19405078887939453: 
INFO:root:eval_accuracy_multilabel after step 600: 0.5718085106382979: 
INFO:root:eval_accuracy_thresh after step 600: 0.9287614226341248: 
INFO:root:eval_roc_auc after step 600: 0.9415472051933812: 
INFO:root:eval_fbeta after step 600: 0.598660409450531: 
INFO:root:lr after step 600: 5.267248723062775e-05
INFO:root:train_loss after step 600: 0.1794436551630497
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 620: 0.18538517132401466: 
INFO:root:eval_accuracy_multilabel after step 620: 0.5398936170212766: 
INFO:root:eval_accuracy_thresh after step 620: 0.9302811622619629: 
INFO:root:eval_roc_auc after step 620: 0.9498168361244019: 
INFO:root:eval_fbeta after step 620: 0.6269500255584717: 
INFO:root:lr after step 620: 4.964582201835856e-05
INFO:root:train_loss after step 620: 0.18093932867050172
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 640: 0.1783585473895073: 
INFO:root:eval_accuracy_multilabel after step 640: 0.5718085106382979: 
INFO:root:eval_accuracy_thresh after step 640: 0.9340805411338806: 
INFO:root:eval_roc_auc after step 640: 0.9513484973086124: 
INFO:root:eval_fbeta after step 640: 0.6506778597831726: 
INFO:root:lr after step 640: 4.6219224523667933e-05
INFO:root:train_loss after step 640: 0.17800946310162544
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.16877854615449905: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.598404255319149: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.9403495788574219: 
INFO:root:eval_roc_auc after epoch 7: 0.9600439531000797: 
INFO:root:eval_fbeta after epoch 7: 0.6662832498550415: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 660: 0.16976625844836235: 
INFO:root:eval_accuracy_multilabel after step 660: 0.5824468085106383: 
INFO:root:eval_accuracy_thresh after step 660: 0.9399695992469788: 
INFO:root:eval_roc_auc after step 660: 0.9593910424142742: 
INFO:root:eval_fbeta after step 660: 0.6639363765716553: 
INFO:root:lr after step 660: 4.24624503900566e-05
INFO:root:train_loss after step 660: 0.1677791118621826
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 680: 0.16051511963208517: 
INFO:root:eval_accuracy_multilabel after step 680: 0.5824468085106383: 
INFO:root:eval_accuracy_thresh after step 680: 0.9443389177322388: 
INFO:root:eval_roc_auc after step 680: 0.9659098696670654: 
INFO:root:eval_fbeta after step 680: 0.6872113347053528: 
INFO:root:lr after step 680: 3.845197670524289e-05
INFO:root:train_loss after step 680: 0.15294451154768468
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 700: 0.15405994902054468: 
INFO:root:eval_accuracy_multilabel after step 700: 0.625: 
INFO:root:eval_accuracy_thresh after step 700: 0.9473784565925598: 
INFO:root:eval_roc_auc after step 700: 0.9700167277212918: 
INFO:root:eval_fbeta after step 700: 0.7006576061248779: 
INFO:root:lr after step 700: 3.426944514819856e-05
INFO:root:train_loss after step 700: 0.14644106552004815
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 720: 0.14486787716547647: 
INFO:root:eval_accuracy_multilabel after step 720: 0.6196808510638298: 
INFO:root:eval_accuracy_thresh after step 720: 0.9479483366012573: 
INFO:root:eval_roc_auc after step 720: 0.9735702003588516: 
INFO:root:eval_fbeta after step 720: 0.7091067433357239: 
INFO:root:lr after step 720: 3e-05
INFO:root:train_loss after step 720: 0.14544934555888175
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 740: 0.13898373395204544: 
INFO:root:eval_accuracy_multilabel after step 740: 0.6117021276595744: 
INFO:root:eval_accuracy_thresh after step 740: 0.9540273547172546: 
INFO:root:eval_roc_auc after step 740: 0.9776499576355662: 
INFO:root:eval_fbeta after step 740: 0.7233090996742249: 
INFO:root:lr after step 740: 2.573055485180145e-05
INFO:root:train_loss after step 740: 0.14054662585258484
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.13368158787488937: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.601063829787234: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.9545972943305969: 
INFO:root:eval_roc_auc after epoch 8: 0.9791372296152312: 
INFO:root:eval_fbeta after epoch 8: 0.728154182434082: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 760: 0.1310586972783009: 
INFO:root:eval_accuracy_multilabel after step 760: 0.6170212765957447: 
INFO:root:eval_accuracy_thresh after step 760: 0.9566869139671326: 
INFO:root:eval_roc_auc after step 760: 0.980832118470893: 
INFO:root:eval_fbeta after step 760: 0.7279695272445679: 
INFO:root:lr after step 760: 2.154802329475711e-05
INFO:root:train_loss after step 760: 0.12491403259336949
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 780: 0.12864878649512926: 
INFO:root:eval_accuracy_multilabel after step 780: 0.6170212765957447: 
INFO:root:eval_accuracy_thresh after step 780: 0.9582067131996155: 
INFO:root:eval_roc_auc after step 780: 0.981233334579346: 
INFO:root:eval_fbeta after step 780: 0.7360183596611023: 
INFO:root:lr after step 780: 1.753754960994341e-05
INFO:root:train_loss after step 780: 0.11912533976137638
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 820: 0.12006076487402122: 
INFO:root:eval_accuracy_multilabel after step 820: 0.625: 
INFO:root:eval_accuracy_thresh after step 820: 0.9621960520744324: 
INFO:root:eval_roc_auc after step 820: 0.9842158218700159: 
INFO:root:eval_fbeta after step 820: 0.7492263317108154: 
INFO:root:lr after step 820: 1.035417798164145e-05
INFO:root:train_loss after step 820: 0.11239382810890675
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 840: 0.11673032057782014: 
INFO:root:eval_accuracy_multilabel after step 840: 0.6223404255319149: 
INFO:root:eval_accuracy_thresh after step 840: 0.9637157917022705: 
INFO:root:eval_roc_auc after step 840: 0.9852797610147528: 
INFO:root:eval_fbeta after step 840: 0.7502970099449158: 
INFO:root:lr after step 840: 7.3275127693722555e-06
INFO:root:train_loss after step 840: 0.11365534737706184
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.11660859609643619: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.6276595744680851: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.9646656513214111: 
INFO:root:eval_roc_auc after epoch 9: 0.9859118009868421: 
INFO:root:eval_fbeta after epoch 9: 0.7564111948013306: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 860: 0.11517772637307644: 
INFO:root:eval_accuracy_multilabel after step 860: 0.6303191489361702: 
INFO:root:eval_accuracy_thresh after step 860: 0.9652355909347534: 
INFO:root:eval_roc_auc after step 860: 0.9865050588118023: 
INFO:root:eval_fbeta after step 860: 0.7565292119979858: 
INFO:root:lr after step 860: 4.76239401506456e-06
INFO:root:train_loss after step 860: 0.10962763614952564
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 880: 0.11390307048956554: 
INFO:root:eval_accuracy_multilabel after step 880: 0.6276595744680851: 
INFO:root:eval_accuracy_thresh after step 880: 0.9659954309463501: 
INFO:root:eval_roc_auc after step 880: 0.9870424018141946: 
INFO:root:eval_fbeta after step 880: 0.7563086748123169: 
INFO:root:lr after step 880: 2.711040139364447e-06
INFO:root:train_loss after step 880: 0.10665244571864604
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 900: 0.11328119970858097: 
INFO:root:eval_accuracy_multilabel after step 900: 0.625: 
INFO:root:eval_accuracy_thresh after step 900: 0.9665653705596924: 
INFO:root:eval_roc_auc after step 900: 0.9871237041467305: 
INFO:root:eval_fbeta after step 900: 0.7603403925895691: 
INFO:root:lr after step 900: 1.2152107915650823e-06
INFO:root:train_loss after step 900: 0.10137793645262719
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 920: 0.11315881895522277: 
INFO:root:eval_accuracy_multilabel after step 920: 0.6329787234042553: 
INFO:root:eval_accuracy_thresh after step 920: 0.9663754105567932: 
INFO:root:eval_roc_auc after step 920: 0.9872201143839713: 
INFO:root:eval_fbeta after step 920: 0.7591171264648438: 
INFO:root:lr after step 920: 3.053567435720195e-07
INFO:root:train_loss after step 920: 0.10774710439145566
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 940: 0.11314999560515086: 
INFO:root:eval_accuracy_multilabel after step 940: 0.6329787234042553: 
INFO:root:eval_accuracy_thresh after step 940: 0.9663754105567932: 
INFO:root:eval_roc_auc after step 940: 0.9872453461423445: 
INFO:root:eval_fbeta after step 940: 0.7584326267242432: 
INFO:root:lr after step 940: 0.0
INFO:root:train_loss after step 940: 0.10382835976779461
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.11314999560515086: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.6329787234042553: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.9663754105567932: 
INFO:root:eval_roc_auc after epoch 10: 0.9872453461423445: 
INFO:root:eval_fbeta after epoch 10: 0.7584326267242432: 
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/aa5118/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:root:Writing example 0 of 3001
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original_2x/c

data/phenotype_classification/transformer/original_2x/output/bert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6382280538479487: 
INFO:root:eval_accuracy_multilabel after step 20: 0.0398936170212766: 
INFO:root:eval_accuracy_thresh after step 20: 0.6451367735862732: 
INFO:root:eval_roc_auc after step 20: 0.5223978581040669: 
INFO:root:eval_fbeta after step 20: 0.38101062178611755: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.6651126950979233
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.558442085981369: 
INFO:root:eval_accuracy_multilabel after step 40: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 40: 0.8444148898124695: 
INFO:root:eval_roc_auc after step 40: 0.5980942234848485: 
INFO:root:eval_fbeta after step 40: 0.37516698241233826: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.6039296388626099
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.49541763961315155: 
INFO:root:eval_accuracy_multilabel after step 60: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 60: 0.8656914830207825: 
INFO:root:eval_roc_auc after step 60: 0.6079531436901915: 
INFO:root:eval_fbeta after step 60: 0.3738793730735779: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5332295387983322
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.4502706900238991: 
INFO:root:eval_accuracy_multilabel after step 80: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 80: 0.8643617033958435: 
INFO:root:eval_roc_auc after step 80: 0.6313510827850877: 
INFO:root:eval_fbeta after step 80: 0.3434670567512512: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.4756103068590164
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.42403187851111096: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8662614226341248: 
INFO:root:eval_roc_auc after epoch 1: 0.6321623928429028: 
INFO:root:eval_fbeta after epoch 1: 0.26637011766433716: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.4135235051314036: 
INFO:root:eval_accuracy_multilabel after step 100: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 100: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 100: 0.6406075558213716: 
INFO:root:eval_fbeta after step 100: 0.14560067653656006: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.4320788010954857
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.3916874478260676: 
INFO:root:eval_accuracy_multilabel after step 120: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 120: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 120: 0.6414761824661085: 
INFO:root:eval_fbeta after step 120: 0.11148484796285629: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.399710938334465
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.3842996930082639: 
INFO:root:eval_accuracy_multilabel after step 140: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 140: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 140: 0.6336530913576556: 
INFO:root:eval_fbeta after step 140: 0.11817725002765656: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.3842389896512032
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.37951523313919705: 
INFO:root:eval_accuracy_multilabel after step 160: 0.07712765957446809: 
INFO:root:eval_accuracy_thresh after step 160: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 160: 0.6392406798245613: 
INFO:root:eval_fbeta after step 160: 0.022531012073159218: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.3780908152461052
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3774275754888852: 
INFO:root:eval_accuracy_multilabel after step 180: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 180: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 180: 0.6419345594098884: 
INFO:root:eval_fbeta after step 180: 0.033911921083927155: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.38038247674703596
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.38004377981026966: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8662614226341248: 
INFO:root:eval_roc_auc after epoch 2: 0.6414744691985645: 
INFO:root:eval_fbeta after epoch 2: 0.12063871324062347: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.37656106303135556: 
INFO:root:eval_accuracy_multilabel after step 200: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 200: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 200: 0.647435394238437: 
INFO:root:eval_fbeta after step 200: 0.1050063967704773: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.3727929949760437
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.3737833723425865: 
INFO:root:eval_accuracy_multilabel after step 220: 0.10372340425531915: 
INFO:root:eval_accuracy_thresh after step 220: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 220: 0.653194153708134: 
INFO:root:eval_fbeta after step 220: 0.0450885146856308: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.37551104128360746
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.373253936568896: 
INFO:root:eval_accuracy_multilabel after step 240: 0.09308510638297872: 
INFO:root:eval_accuracy_thresh after step 240: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 240: 0.6616642369417863: 
INFO:root:eval_fbeta after step 240: 0.08099286258220673: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.37662182450294496
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.374094953139623: 
INFO:root:eval_accuracy_multilabel after step 260: 0.09308510638297872: 
INFO:root:eval_accuracy_thresh after step 260: 0.8662614226341248: 
INFO:root:eval_roc_auc after step 260: 0.6537233976275917: 
INFO:root:eval_fbeta after step 260: 0.08432884514331818: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.37142002284526826
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.3724478781223297: 
INFO:root:eval_accuracy_multilabel after step 280: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after step 280: 0.8687310218811035: 
INFO:root:eval_roc_auc after step 280: 0.669334379984051: 
INFO:root:eval_fbeta after step 280: 0.15822471678256989: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.3572603762149811
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.3725009361902873: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.08776595744680851: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8666413426399231: 
INFO:root:eval_roc_auc after epoch 3: 0.6663078149920255: 
INFO:root:eval_fbeta after epoch 3: 0.13528220355510712: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.36995947857697803: 
INFO:root:eval_accuracy_multilabel after step 300: 0.10372340425531915: 
INFO:root:eval_accuracy_thresh after step 300: 0.8675912022590637: 
INFO:root:eval_roc_auc after step 300: 0.6685875510865232: 
INFO:root:eval_fbeta after step 300: 0.16539353132247925: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.3520405650138855
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.36087414373954135: 
INFO:root:eval_accuracy_multilabel after step 320: 0.1622340425531915: 
INFO:root:eval_accuracy_thresh after step 320: 0.8666413426399231: 
INFO:root:eval_roc_auc after step 320: 0.6967303067683412: 
INFO:root:eval_fbeta after step 320: 0.13312549889087677: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.361592598259449
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.34977229684591293: 
INFO:root:eval_accuracy_multilabel after step 340: 0.2047872340425532: 
INFO:root:eval_accuracy_thresh after step 340: 0.8712006211280823: 
INFO:root:eval_roc_auc after step 340: 0.7260662754186603: 
INFO:root:eval_fbeta after step 340: 0.2370711714029312: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.3539153814315796
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.3475448365012805: 
INFO:root:eval_accuracy_multilabel after step 360: 0.2632978723404255: 
INFO:root:eval_accuracy_thresh after step 360: 0.8732903003692627: 
INFO:root:eval_roc_auc after step 360: 0.7488164436303827: 
INFO:root:eval_fbeta after step 360: 0.264405220746994: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.34329123646020887
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.3350159327189128: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.27393617021276595: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8778495788574219: 
INFO:root:eval_roc_auc after epoch 4: 0.7587523051236045: 
INFO:root:eval_fbeta after epoch 4: 0.2589394152164459: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.33235084762175876: 
INFO:root:eval_accuracy_multilabel after step 380: 0.3271276595744681: 
INFO:root:eval_accuracy_thresh after step 380: 0.8768997192382812: 
INFO:root:eval_roc_auc after step 380: 0.7642657558313397: 
INFO:root:eval_fbeta after step 380: 0.23586556315422058: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.3361831307411194
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.32970546931028366: 
INFO:root:eval_accuracy_multilabel after step 400: 0.26595744680851063: 
INFO:root:eval_accuracy_thresh after step 400: 0.8770896792411804: 
INFO:root:eval_roc_auc after step 400: 0.7936585738137959: 
INFO:root:eval_fbeta after step 400: 0.38554590940475464: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.32306510508060454
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.31578996529181796: 
INFO:root:eval_accuracy_multilabel after step 420: 0.2872340425531915: 
INFO:root:eval_accuracy_thresh after step 420: 0.8787993788719177: 
INFO:root:eval_roc_auc after step 420: 0.8000183786881978: 
INFO:root:eval_fbeta after step 420: 0.3766002058982849: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.3049935385584831
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.3062935769557953: 
INFO:root:eval_accuracy_multilabel after step 440: 0.43351063829787234: 
INFO:root:eval_accuracy_thresh after step 440: 0.8951367735862732: 
INFO:root:eval_roc_auc after step 440: 0.8153522789573364: 
INFO:root:eval_fbeta after step 440: 0.3981267809867859: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.3105488896369934
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.29407331099112827: 
INFO:root:eval_accuracy_multilabel after step 460: 0.4521276595744681: 
INFO:root:eval_accuracy_thresh after step 460: 0.8939969539642334: 
INFO:root:eval_roc_auc after step 460: 0.8392023649322169: 
INFO:root:eval_fbeta after step 460: 0.4717917740345001: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.2973130434751511
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.2849983622630437: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.4308510638297872: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8941869139671326: 
INFO:root:eval_roc_auc after epoch 5: 0.8439805123604466: 
INFO:root:eval_fbeta after epoch 5: 0.412905216217041: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 480: 0.2717057565848033: 
INFO:root:eval_accuracy_multilabel after step 480: 0.4920212765957447: 
INFO:root:eval_accuracy_thresh after step 480: 0.8958966732025146: 
INFO:root:eval_roc_auc after step 480: 0.8685900431120414: 
INFO:root:eval_fbeta after step 480: 0.445234477519989: 
INFO:root:lr after step 480: 5.76e-05
INFO:root:train_loss after step 480: 0.2895387999713421
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 500: 0.27295153588056564: 
INFO:root:eval_accuracy_multilabel after step 500: 0.46808510638297873: 
INFO:root:eval_accuracy_thresh after step 500: 0.8991261720657349: 
INFO:root:eval_roc_auc after step 500: 0.8538185618520734: 
INFO:root:eval_fbeta after step 500: 0.4392823278903961: 
INFO:root:lr after step 500: 6e-05
INFO:root:train_loss after step 500: 0.258198457211256
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 520: 0.2733690030872822: 
INFO:root:eval_accuracy_multilabel after step 520: 0.4308510638297872: 
INFO:root:eval_accuracy_thresh after step 520: 0.8970364928245544: 
INFO:root:eval_roc_auc after step 520: 0.8636464874900318: 
INFO:root:eval_fbeta after step 520: 0.5067996382713318: 
INFO:root:lr after step 520: 5.969464325642798e-05
INFO:root:train_loss after step 520: 0.25242322236299514
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 540: 0.2527858739097913: 
INFO:root:eval_accuracy_multilabel after step 540: 0.42819148936170215: 
INFO:root:eval_accuracy_thresh after step 540: 0.9029255509376526: 
INFO:root:eval_roc_auc after step 540: 0.8835619766746411: 
INFO:root:eval_fbeta after step 540: 0.4866953492164612: 
INFO:root:lr after step 540: 5.878478920843492e-05
INFO:root:train_loss after step 540: 0.2601075701415539
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 560: 0.24693730721871057: 
INFO:root:eval_accuracy_multilabel after step 560: 0.4787234042553192: 
INFO:root:eval_accuracy_thresh after step 560: 0.912044107913971: 
INFO:root:eval_roc_auc after step 560: 0.8986997856858054: 
INFO:root:eval_fbeta after step 560: 0.5509953498840332: 
INFO:root:lr after step 560: 5.728895986063555e-05
INFO:root:train_loss after step 560: 0.2596549466252327
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.2383527768154939: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.46808510638297873: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.9093844890594482: 
INFO:root:eval_roc_auc after epoch 6: 0.901318904505582: 
INFO:root:eval_fbeta after epoch 6: 0.5027181506156921: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 580: 0.22806462521354356: 
INFO:root:eval_accuracy_multilabel after step 580: 0.4973404255319149: 
INFO:root:eval_accuracy_thresh after step 580: 0.9154635071754456: 
INFO:root:eval_roc_auc after step 580: 0.9103121573464913: 
INFO:root:eval_fbeta after step 580: 0.5339313745498657: 
INFO:root:lr after step 580: 5.5237605984935435e-05
INFO:root:train_loss after step 580: 0.2217001847922802
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 600: 0.2182245266934236: 
INFO:root:eval_accuracy_multilabel after step 600: 0.5292553191489362: 
INFO:root:eval_accuracy_thresh after step 600: 0.9211626052856445: 
INFO:root:eval_roc_auc after step 600: 0.916445343650319: 
INFO:root:eval_fbeta after step 600: 0.5545096397399902: 
INFO:root:lr after step 600: 5.267248723062775e-05
INFO:root:train_loss after step 600: 0.22282858341932296
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 620: 0.21146428088347116: 
INFO:root:eval_accuracy_multilabel after step 620: 0.526595744680851: 
INFO:root:eval_accuracy_thresh after step 620: 0.924962043762207: 
INFO:root:eval_roc_auc after step 620: 0.9223737166068582: 
INFO:root:eval_fbeta after step 620: 0.5950745344161987: 
INFO:root:lr after step 620: 4.964582201835856e-05
INFO:root:train_loss after step 620: 0.20323487147688865
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 640: 0.19696818167964616: 
INFO:root:eval_accuracy_multilabel after step 640: 0.5292553191489362: 
INFO:root:eval_accuracy_thresh after step 640: 0.9280015230178833: 
INFO:root:eval_roc_auc after step 640: 0.9350333619916267: 
INFO:root:eval_fbeta after step 640: 0.5809414982795715: 
INFO:root:lr after step 640: 4.6219224523667933e-05
INFO:root:train_loss after step 640: 0.20382243543863296
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.19158807645241419: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.5398936170212766: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.9327507615089417: 
INFO:root:eval_roc_auc after epoch 7: 0.9365239047547846: 
INFO:root:eval_fbeta after epoch 7: 0.6177889704704285: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 660: 0.19023054465651512: 
INFO:root:eval_accuracy_multilabel after step 660: 0.5345744680851063: 
INFO:root:eval_accuracy_thresh after step 660: 0.9348404407501221: 
INFO:root:eval_roc_auc after step 660: 0.9387665719696969: 
INFO:root:eval_fbeta after step 660: 0.6275178790092468: 
INFO:root:lr after step 660: 4.24624503900566e-05
INFO:root:train_loss after step 660: 0.19277693554759026
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 680: 0.18656699359416962: 
INFO:root:eval_accuracy_multilabel after step 680: 0.574468085106383: 
INFO:root:eval_accuracy_thresh after step 680: 0.9411094188690186: 
INFO:root:eval_roc_auc after step 680: 0.9504504336124402: 
INFO:root:eval_fbeta after step 680: 0.6550090312957764: 
INFO:root:lr after step 680: 3.845197670524289e-05
INFO:root:train_loss after step 680: 0.17208859249949454
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 700: 0.1698215069870154: 
INFO:root:eval_accuracy_multilabel after step 700: 0.5425531914893617: 
INFO:root:eval_accuracy_thresh after step 700: 0.9439589977264404: 
INFO:root:eval_roc_auc after step 700: 0.9541570100677831: 
INFO:root:eval_fbeta after step 700: 0.6556284427642822: 
INFO:root:lr after step 700: 3.426944514819856e-05
INFO:root:train_loss after step 700: 0.16643343046307563
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 720: 0.1624588780105114: 
INFO:root:eval_accuracy_multilabel after step 720: 0.5425531914893617: 
INFO:root:eval_accuracy_thresh after step 720: 0.9473784565925598: 
INFO:root:eval_roc_auc after step 720: 0.9605309883373205: 
INFO:root:eval_fbeta after step 720: 0.6774908900260925: 
INFO:root:lr after step 720: 3e-05
INFO:root:train_loss after step 720: 0.16628207489848137
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 740: 0.15306006806592146: 
INFO:root:eval_accuracy_multilabel after step 740: 0.5452127659574468: 
INFO:root:eval_accuracy_thresh after step 740: 0.9523176550865173: 
INFO:root:eval_roc_auc after step 740: 0.9626169694477671: 
INFO:root:eval_fbeta after step 740: 0.6934199929237366: 
INFO:root:lr after step 740: 2.573055485180145e-05
INFO:root:train_loss after step 740: 0.15547022074460984
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.14780312528212866: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.5531914893617021: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.9547872543334961: 
INFO:root:eval_roc_auc after epoch 8: 0.9675608365729664: 
INFO:root:eval_fbeta after epoch 8: 0.7022637724876404: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 760: 0.1448595511416594: 
INFO:root:eval_accuracy_multilabel after step 760: 0.5797872340425532: 
INFO:root:eval_accuracy_thresh after step 760: 0.9551671743392944: 
INFO:root:eval_roc_auc after step 760: 0.9693030739134769: 
INFO:root:eval_fbeta after step 760: 0.696286678314209: 
INFO:root:lr after step 760: 2.154802329475711e-05
INFO:root:train_loss after step 760: 0.14713641293346882
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 780: 0.1420874564598004: 
INFO:root:eval_accuracy_multilabel after step 780: 0.5638297872340425: 
INFO:root:eval_accuracy_thresh after step 780: 0.9599164128303528: 
INFO:root:eval_roc_auc after step 780: 0.9724238686204147: 
INFO:root:eval_fbeta after step 780: 0.7099393010139465: 
INFO:root:lr after step 780: 1.753754960994341e-05
INFO:root:train_loss after step 780: 0.13847305588424205
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 800: 0.13680805762608847: 
INFO:root:eval_accuracy_multilabel after step 800: 0.550531914893617: 
INFO:root:eval_accuracy_thresh after step 800: 0.9593465328216553: 
INFO:root:eval_roc_auc after step 800: 0.9734851599880383: 
INFO:root:eval_fbeta after step 800: 0.7130834460258484: 
INFO:root:lr after step 800: 1.3780775476332083e-05
INFO:root:train_loss after step 800: 0.1344921063631773
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 820: 0.1318874123195807: 
INFO:root:eval_accuracy_multilabel after step 820: 0.5452127659574468: 
INFO:root:eval_accuracy_thresh after step 820: 0.9614361524581909: 
INFO:root:eval_roc_auc after step 820: 0.9766785349381977: 
INFO:root:eval_fbeta after step 820: 0.7197984457015991: 
INFO:root:lr after step 820: 1.035417798164145e-05
INFO:root:train_loss after step 820: 0.1235019788146019
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 840: 0.12795358647902808: 
INFO:root:eval_accuracy_multilabel after step 840: 0.5664893617021277: 
INFO:root:eval_accuracy_thresh after step 840: 0.964475691318512: 
INFO:root:eval_roc_auc after step 840: 0.9789824125299044: 
INFO:root:eval_fbeta after step 840: 0.7247323989868164: 
INFO:root:lr after step 840: 7.3275127693722555e-06
INFO:root:train_loss after step 840: 0.13364980295300483
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.12688010496397814: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.5611702127659575: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.9650456309318542: 
INFO:root:eval_roc_auc after epoch 9: 0.9788238574063: 
INFO:root:eval_fbeta after epoch 9: 0.7235589027404785: 
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 860: 0.12626786592106023: 
INFO:root:eval_accuracy_multilabel after step 860: 0.5611702127659575: 
INFO:root:eval_accuracy_thresh after step 860: 0.9652355909347534: 
INFO:root:eval_roc_auc after step 860: 0.9794206975179425: 
INFO:root:eval_fbeta after step 860: 0.7286032438278198: 
INFO:root:lr after step 860: 4.76239401506456e-06
INFO:root:train_loss after step 860: 0.12455603443086147
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 880: 0.12470618449151516: 
INFO:root:eval_accuracy_multilabel after step 880: 0.5558510638297872: 
INFO:root:eval_accuracy_thresh after step 880: 0.9656155109405518: 
INFO:root:eval_roc_auc after step 880: 0.9804020883173844: 
INFO:root:eval_fbeta after step 880: 0.728894054889679: 
INFO:root:lr after step 880: 2.711040139364447e-06
INFO:root:train_loss after step 880: 0.12364947348833084
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 900: 0.12454178122182687: 
INFO:root:eval_accuracy_multilabel after step 900: 0.5452127659574468: 
INFO:root:eval_accuracy_thresh after step 900: 0.9658054709434509: 
INFO:root:eval_roc_auc after step 900: 0.9806051883971292: 
INFO:root:eval_fbeta after step 900: 0.7313688397407532: 
INFO:root:lr after step 900: 1.2152107915650823e-06
INFO:root:train_loss after step 900: 0.11855292432010174
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 920: 0.12379382488628228: 
INFO:root:eval_accuracy_multilabel after step 920: 0.5478723404255319: 
INFO:root:eval_accuracy_thresh after step 920: 0.9663754105567932: 
INFO:root:eval_roc_auc after step 920: 0.9808177893241627: 
INFO:root:eval_fbeta after step 920: 0.7329977750778198: 
INFO:root:lr after step 920: 3.053567435720195e-07
INFO:root:train_loss after step 920: 0.11797427535057067
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 940: 0.12375071210165818: 
INFO:root:eval_accuracy_multilabel after step 940: 0.5478723404255319: 
INFO:root:eval_accuracy_thresh after step 940: 0.9667553305625916: 
INFO:root:eval_roc_auc after step 940: 0.9808486281399522: 
INFO:root:eval_fbeta after step 940: 0.7330515384674072: 
INFO:root:lr after step 940: 0.0
INFO:root:train_loss after step 940: 0.11781796663999558
INFO:root:Running evaluation
INFO:root:  Num examples = 376
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.12375071210165818: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.5478723404255319: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.9667553305625916: 
INFO:root:eval_roc_auc after epoch 10: 0.9808486281399522: 
INFO:root:eval_fbeta after epoch 10: 0.7330515384674072: 
INFO:pytorch_transformers.tokenization_utils:Model name 'biobert' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming 'biobert' is a path or url to a directory containing tokenizer files.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/added_tokens.json. We won't load i

data/phenotype_classification/transformer/synthetic/output/biobert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6970422069231669: 
INFO:root:eval_accuracy_multilabel after step 20: 0.11170212765957446: 
INFO:root:eval_accuracy_thresh after step 20: 0.4407294988632202: 
INFO:root:eval_roc_auc after step 20: 0.6002800733826769: 
INFO:root:eval_fbeta after step 20: 0.34148284792900085: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.727754658460617
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5759041905403137: 
INFO:root:eval_accuracy_multilabel after step 40: 0.05319148936170213: 
INFO:root:eval_accuracy_thresh after step 40: 0.8332067131996155: 
INFO:root:eval_roc_auc after step 40: 0.6330039451815301: 
INFO:root:eval_fbeta after step 40: 0.3426564633846283: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.6513714492321014
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.5094102223714193: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.0425531914893617: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8856382966041565: 
INFO:root:eval_roc_auc after epoch 1: 0.6091366033482284: 
INFO:root:eval_fbeta after epoch 1: 0.3377731740474701: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.4425976574420929: 
INFO:root:eval_accuracy_multilabel after step 60: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 60: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 60: 0.6254687059689561: 
INFO:root:eval_fbeta after step 60: 0.31542277336120605: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.522059878706932
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.39222991466522217: 
INFO:root:eval_accuracy_multilabel after step 80: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 80: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 80: 0.6367054283627496: 
INFO:root:eval_fbeta after step 80: 0.17409798502922058: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.43665044009685516
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.37447085479895276: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 2: 0.6384134734275346: 
INFO:root:eval_fbeta after epoch 2: 0.12116622179746628: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.36834605038166046: 
INFO:root:eval_accuracy_multilabel after step 100: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 100: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 100: 0.6324568819263527: 
INFO:root:eval_fbeta after step 100: 0.11696664988994598: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.410517780482769
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.35520951449871063: 
INFO:root:eval_accuracy_multilabel after step 120: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 120: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 120: 0.6423665625736846: 
INFO:root:eval_fbeta after step 120: 0.0: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.39286255836486816
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.3452451328436534: 
INFO:root:eval_accuracy_multilabel after step 140: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 140: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 140: 0.6490456226313095: 
INFO:root:eval_fbeta after step 140: 0.0: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.37900543957948685
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.34555651744206745: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 3: 0.6474986053840801: 
INFO:root:eval_fbeta after epoch 3: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.34090517461299896: 
INFO:root:eval_accuracy_multilabel after step 160: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 160: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 160: 0.6538901157674989: 
INFO:root:eval_fbeta after step 160: 0.0: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.37666564583778384
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3399626662333806: 
INFO:root:eval_accuracy_multilabel after step 180: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 180: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 180: 0.6610328496747813: 
INFO:root:eval_fbeta after step 180: 0.0: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.37570609897375107
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.33833127717177075: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 4: 0.6603463248276139: 
INFO:root:eval_fbeta after epoch 4: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.34463849663734436: 
INFO:root:eval_accuracy_multilabel after step 200: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 200: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 200: 0.6527981458797008: 
INFO:root:eval_fbeta after step 200: 0.11696664988994598: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.37602600604295733
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.3385680715243022: 
INFO:root:eval_accuracy_multilabel after step 220: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 220: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 220: 0.6561955452804473: 
INFO:root:eval_fbeta after step 220: 0.0: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.3774724841117859


KeyboardInterrupt: 