# Train

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import torch
import apex
import os
import logging

from pytorch_transformers import BertTokenizer
from fast_bert.data_cls import BertDataBunch
from fast_bert.learner_cls import BertLearner
from fast_bert.metrics import accuracy_multilabel, accuracy_thresh, fbeta, roc_auc
from fast_bert.prediction import BertClassificationPredictor

Let's first:

1. Assign our paths
2. Check gpu status
3. Import our labels
4. Create a logger object
5. Decide the metrics we want to report

In [2]:
BASE = Path('data/phenotype_classification/')

DATA_PATH = BASE/'transformer/original/' # change this as necessary
LABEL_PATH = BASE
OUTPUT_DIR = BASE/'transformer/original/output' # change this as necessary
OUTPUT_DIR.mkdir(exist_ok=True)

In [3]:
# check if (multiple) GPUs are available

multi_gpu=False

if torch.cuda.is_available():
    
    device_cuda = torch.device("cuda")
    
    if torch.cuda.device_count() > 1:
        multi_gpu = True
else:
    device_cuda = torch.device("cpu")
    
print (multi_gpu)

True


In [4]:
categories = pd.read_csv(LABEL_PATH/'labels.csv', sep=',',header=None,names=["name"])
labels = list(categories['name'])
labels

['Advanced.Cancer',
 'Advanced.Heart.Disease',
 'Advanced.Lung.Disease',
 'Alcohol.Abuse',
 'Chronic.Neurological.Dystrophies',
 'Chronic.Pain.Fibromyalgia',
 'Dementia',
 'Depression',
 'Developmental.Delay.Retardation',
 'Non.Adherence',
 'Obesity',
 'Other.Substance.Abuse',
 'Schizophrenia.and.other.Psychiatric.Disorders',
 'Unsure']

In [5]:
logging.basicConfig(level=logging.NOTSET)
logger = logging.getLogger()

In [6]:
metrics = []
metrics.append({'name': 'accuracy_multilabel', 'function': accuracy_multilabel})
metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh})
metrics.append({'name': 'roc_auc', 'function': roc_auc})
metrics.append({'name': 'fbeta', 'function': fbeta})

## Pipeline

#### Create a DataBunch object

In [9]:
databunch = BertDataBunch(DATA_PATH, 
                          LABEL_PATH,
                          tokenizer='bert-base-uncased',
                          train_file='train.csv',
                          val_file='val.csv',
                          text_col='text',
                          label_file='labels.csv',
                          label_col=labels,
                          batch_size_per_gpu=8,
                          max_seq_length=512,
                          multi_gpu=multi_gpu,
                          multi_label=True,
                          model_type='bert',
                          clear_cache=True)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/aa5118/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:root:Writing example 0 of 1501
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache/cached_train_multi_label_512
INFO:root:Writing example 0 of 188
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache/cached_dev_multi_label_512


#### Create a Learner object

In [9]:
learner = BertLearner.from_pretrained_model(databunch,
                                            pretrained_path='bert-base-uncased',
                                            metrics=metrics,
                                            device=device_cuda,
                                            logger=logger,
                                            output_dir=OUTPUT_DIR,
                                            finetuned_wgts_path=None,
                                            warmup_steps=500,
                                            multi_gpu=multi_gpu,
                                            is_fp16=True,
                                            multi_label=True,
                                            logging_steps=20)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-config.json HTTP/1.1" 200 0
INFO:pytorch_transformers.modeling_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/aa5118/.cache/torch/pytorch_transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 14,
  "output_attentions": false,
  "output_hid

#### Train the model

In [10]:
learner.fit(epochs=10,
            lr=6e-5,
            validate=True, 	# Evaluate the model after each epoch
            schedule_type="warmup_cosine")

INFO:root:***** Running training *****
INFO:root:  Num examples = 1501
INFO:root:  Num Epochs = 20
INFO:root:  Total train batch size (w. parallel, distributed & accumulation) = 32
INFO:root:  Gradient Accumulation steps = 1
INFO:root:  Total optimization steps = 940


data/phenotype_classification/transformer/original/output/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 20: 0.6848235229651133: 
INFO:root:eval_accuracy_multilabel after step 20: 0.026595744680851064: 
INFO:root:eval_accuracy_thresh after step 20: 0.5167173147201538: 
INFO:root:eval_roc_auc after step 20: 0.41002714468924506: 
INFO:root:eval_fbeta after step 20: 0.32542797923088074: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.715276351571083
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 40: 0.5737837851047516: 
INFO:root:eval_accuracy_multilabel after step 40: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after step 40: 0.822568416595459: 
INFO:root:eval_roc_auc after step 40: 0.5991413766728202: 
INFO:root:eval_fbeta after step 40: 0.3410872220993042: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.6381511867046357
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 1: 0.5580857396125793: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.8237082362174988: 
INFO:root:eval_roc_auc after epoch 1: 0.5791796782894245: 
INFO:root:eval_fbeta after epoch 1: 0.32421910762786865: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 60: 0.4910062899192174: 
INFO:root:eval_accuracy_multilabel after step 60: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 60: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 60: 0.6083242755185959: 
INFO:root:eval_fbeta after step 60: 0.341076523065567: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5493393182754517
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 80: 0.42475944260756177: 
INFO:root:eval_accuracy_multilabel after step 80: 0.10638297872340426: 
INFO:root:eval_accuracy_thresh after step 80: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 80: 0.643564925840939: 
INFO:root:eval_fbeta after step 80: 0.2973541021347046: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.47869707494974134
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 2: 0.39617985983689624: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.05319148936170213: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 2: 0.6615748808106601: 
INFO:root:eval_fbeta after epoch 2: 0.24539975821971893: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 100: 0.3839411735534668: 
INFO:root:eval_accuracy_multilabel after step 100: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 100: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 100: 0.6532445667489059: 
INFO:root:eval_fbeta after step 100: 0.19284003973007202: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.43761933147907256
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 120: 0.36205873390038806: 
INFO:root:eval_accuracy_multilabel after step 120: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 120: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 120: 0.6707347756824991: 
INFO:root:eval_fbeta after step 120: 0.12755055725574493: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.40733384191989896
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 140: 0.34749147295951843: 
INFO:root:eval_accuracy_multilabel after step 140: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 140: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 140: 0.6568080260865066: 
INFO:root:eval_fbeta after step 140: 0.002045826520770788: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.38489552289247514
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 3: 0.3469454348087311: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 3: 0.6577936025948483: 
INFO:root:eval_fbeta after epoch 3: 0.010001818649470806: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 160: 0.3416449874639511: 
INFO:root:eval_accuracy_multilabel after step 160: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 160: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 160: 0.6565772674729559: 
INFO:root:eval_fbeta after step 160: 0.0: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.3810590222477913
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 180: 0.3432370076576869: 
INFO:root:eval_accuracy_multilabel after step 180: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 180: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 180: 0.6674049504551911: 
INFO:root:eval_fbeta after step 180: 0.12017826735973358: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.37624176442623136
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 4: 0.33995052178700763: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 4: 0.6546844717424936: 
INFO:root:eval_fbeta after epoch 4: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 200: 0.33830442031224567: 
INFO:root:eval_accuracy_multilabel after step 200: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 200: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 200: 0.667814708740935: 
INFO:root:eval_fbeta after step 200: 0.027354096993803978: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.3689279228448868
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 220: 0.33772335946559906: 
INFO:root:eval_accuracy_multilabel after step 220: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 220: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 220: 0.6688678594227153: 
INFO:root:eval_fbeta after step 220: 0.04393598809838295: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.37574382275342944
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 5: 0.3348299413919449: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8867781162261963: 
INFO:root:eval_roc_auc after epoch 5: 0.6738194648125464: 
INFO:root:eval_fbeta after epoch 5: 0.0667455717921257: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 240: 0.33370732764403027: 
INFO:root:eval_accuracy_multilabel after step 240: 0.0851063829787234: 
INFO:root:eval_accuracy_thresh after step 240: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 240: 0.6775576105772273: 
INFO:root:eval_fbeta after step 240: 0.023428820073604584: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.37144800275564194
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 260: 0.3330939511458079: 
INFO:root:eval_accuracy_multilabel after step 260: 0.047872340425531915: 
INFO:root:eval_accuracy_thresh after step 260: 0.886398196220398: 
INFO:root:eval_roc_auc after step 260: 0.6885585422381717: 
INFO:root:eval_fbeta after step 260: 0.12998396158218384: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3620541974902153
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 280: 0.33705511192480725: 
INFO:root:eval_accuracy_multilabel after step 280: 0.10638297872340426: 
INFO:root:eval_accuracy_thresh after step 280: 0.886398196220398: 
INFO:root:eval_roc_auc after step 280: 0.7085418068471329: 
INFO:root:eval_fbeta after step 280: 0.1493101567029953: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.3609879553318024
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 6: 0.3302903175354004: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.1276595744680851: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8879179358482361: 
INFO:root:eval_roc_auc after epoch 6: 0.7130326138840484: 
INFO:root:eval_fbeta after epoch 6: 0.14225047826766968: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 300: 0.3231101334095001: 
INFO:root:eval_accuracy_multilabel after step 300: 0.22340425531914893: 
INFO:root:eval_accuracy_thresh after step 300: 0.8848784565925598: 
INFO:root:eval_roc_auc after step 300: 0.729054881730819: 
INFO:root:eval_fbeta after step 300: 0.1427101194858551: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.35896016359329225
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 320: 0.3176308472951253: 
INFO:root:eval_accuracy_multilabel after step 320: 0.19148936170212766: 
INFO:root:eval_accuracy_thresh after step 320: 0.8860182762145996: 
INFO:root:eval_roc_auc after step 320: 0.7503860354376217: 
INFO:root:eval_fbeta after step 320: 0.15726444125175476: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.35287884473800657
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 7: 0.3151118556658427: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.10106382978723404: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8871580362319946: 
INFO:root:eval_roc_auc after epoch 7: 0.7428206035092562: 
INFO:root:eval_fbeta after epoch 7: 0.1157122254371643: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 340: 0.31109300752480823: 
INFO:root:eval_accuracy_multilabel after step 340: 0.21808510638297873: 
INFO:root:eval_accuracy_thresh after step 340: 0.8867781162261963: 
INFO:root:eval_roc_auc after step 340: 0.7638577376741832: 
INFO:root:eval_fbeta after step 340: 0.19610191881656647: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.3385552644729614
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 360: 0.3131638914346695: 
INFO:root:eval_accuracy_multilabel after step 360: 0.20212765957446807: 
INFO:root:eval_accuracy_thresh after step 360: 0.8890577554702759: 
INFO:root:eval_roc_auc after step 360: 0.7588385581109136: 
INFO:root:eval_fbeta after step 360: 0.17900320887565613: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.3260939374566078
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 8: 0.304619421561559: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.26595744680851063: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.8886778354644775: 
INFO:root:eval_roc_auc after epoch 8: 0.7830049516053899: 
INFO:root:eval_fbeta after epoch 8: 0.2869456112384796: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 380: 0.3034232209126155: 
INFO:root:eval_accuracy_multilabel after step 380: 0.2393617021276596: 
INFO:root:eval_accuracy_thresh after step 380: 0.8882978558540344: 
INFO:root:eval_roc_auc after step 380: 0.7820064353617088: 
INFO:root:eval_fbeta after step 380: 0.2739352285861969: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.31867584586143494
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 400: 0.303069402774175: 
INFO:root:eval_accuracy_multilabel after step 400: 0.2925531914893617: 
INFO:root:eval_accuracy_thresh after step 400: 0.8909574747085571: 
INFO:root:eval_roc_auc after step 400: 0.7923287497915265: 
INFO:root:eval_fbeta after step 400: 0.33577778935432434: 
INFO:root:lr after step 400: 4.8e-05
INFO:root:train_loss after step 400: 0.30358903780579566
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 420: 0.29074664910634357: 
INFO:root:eval_accuracy_multilabel after step 420: 0.2872340425531915: 
INFO:root:eval_accuracy_thresh after step 420: 0.8955167531967163: 
INFO:root:eval_roc_auc after step 420: 0.797557121742781: 
INFO:root:eval_fbeta after step 420: 0.30010008811950684: 
INFO:root:lr after step 420: 5.04e-05
INFO:root:train_loss after step 420: 0.3109830230474472
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 9: 0.2991564820210139: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.2765957446808511: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.8920972943305969: 
INFO:root:eval_roc_auc after epoch 9: 0.7909226318846581: 
INFO:root:eval_fbeta after epoch 9: 0.3271926939487457: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 440: 0.2910275459289551: 
INFO:root:eval_accuracy_multilabel after step 440: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 440: 0.8936170339584351: 
INFO:root:eval_roc_auc after step 440: 0.7925070305895343: 
INFO:root:eval_fbeta after step 440: 0.32390812039375305: 
INFO:root:lr after step 440: 5.28e-05
INFO:root:train_loss after step 440: 0.2904456347227097
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 460: 0.27908288439114887: 
INFO:root:eval_accuracy_multilabel after step 460: 0.32978723404255317: 
INFO:root:eval_accuracy_thresh after step 460: 0.9000759720802307: 
INFO:root:eval_roc_auc after step 460: 0.813783549858238: 
INFO:root:eval_fbeta after step 460: 0.3275734484195709: 
INFO:root:lr after step 460: 5.520000000000001e-05
INFO:root:train_loss after step 460: 0.27265576720237733
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 10: 0.28789974252382916: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.8928571343421936: 
INFO:root:eval_roc_auc after epoch 10: 0.7978676753909237: 
INFO:root:eval_fbeta after epoch 10: 0.3106161653995514: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 480: 0.2851452479759852: 
INFO:root:eval_accuracy_multilabel after step 480: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after step 480: 0.8943769335746765: 
INFO:root:eval_roc_auc after step 480: 0.8077284725936404: 
INFO:root:eval_fbeta after step 480: 0.3337230682373047: 
INFO:root:lr after step 480: 5.76e-05
INFO:root:train_loss after step 480: 0.26688227504491807
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 500: 0.28906971712907154: 
INFO:root:eval_accuracy_multilabel after step 500: 0.34574468085106386: 
INFO:root:eval_accuracy_thresh after step 500: 0.8974164128303528: 
INFO:root:eval_roc_auc after step 500: 0.7924595848932903: 
INFO:root:eval_fbeta after step 500: 0.34432560205459595: 
INFO:root:lr after step 500: 6e-05
INFO:root:train_loss after step 500: 0.249484633654356
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 11: 0.29163864254951477: 
INFO:root:eval_accuracy_multilabel after epoch 11: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after epoch 11: 0.8955167531967163: 
INFO:root:eval_roc_auc after epoch 11: 0.7975650293588219: 
INFO:root:eval_fbeta after epoch 11: 0.3508912920951843: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 520: 0.2824757918715477: 
INFO:root:eval_accuracy_multilabel after step 520: 0.34574468085106386: 
INFO:root:eval_accuracy_thresh after step 520: 0.901975691318512: 
INFO:root:eval_roc_auc after step 520: 0.7957549041596936: 
INFO:root:eval_fbeta after step 520: 0.3211449384689331: 
INFO:root:lr after step 520: 5.969464325642798e-05
INFO:root:train_loss after step 520: 0.2494668409228325
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 540: 0.28792832295099896: 
INFO:root:eval_accuracy_multilabel after step 540: 0.35638297872340424: 
INFO:root:eval_accuracy_thresh after step 540: 0.9000759720802307: 
INFO:root:eval_roc_auc after step 540: 0.7924969663509371: 
INFO:root:eval_fbeta after step 540: 0.31474319100379944: 
INFO:root:lr after step 540: 5.878478920843492e-05
INFO:root:train_loss after step 540: 0.2359548933804035
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 560: 0.2909298737843831: 
INFO:root:eval_accuracy_multilabel after step 560: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 560: 0.896276593208313: 
INFO:root:eval_roc_auc after step 560: 0.7894496586785368: 
INFO:root:eval_fbeta after step 560: 0.32101941108703613: 
INFO:root:lr after step 560: 5.728895986063555e-05
INFO:root:train_loss after step 560: 0.2096931032836437
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 12: 0.2915497422218323: 
INFO:root:eval_accuracy_multilabel after epoch 12: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 12: 0.890577495098114: 
INFO:root:eval_roc_auc after epoch 12: 0.7978238240656074: 
INFO:root:eval_fbeta after epoch 12: 0.3726266324520111: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 580: 0.29025957981745404: 
INFO:root:eval_accuracy_multilabel after step 580: 0.3351063829787234: 
INFO:root:eval_accuracy_thresh after step 580: 0.8917173147201538: 
INFO:root:eval_roc_auc after step 580: 0.7990516611744679: 
INFO:root:eval_fbeta after step 580: 0.3325578272342682: 
INFO:root:lr after step 580: 5.5237605984935435e-05
INFO:root:train_loss after step 580: 0.19909504130482675
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 600: 0.29506443440914154: 
INFO:root:eval_accuracy_multilabel after step 600: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 600: 0.8955167531967163: 
INFO:root:eval_roc_auc after step 600: 0.7927442590707544: 
INFO:root:eval_fbeta after step 600: 0.31072166562080383: 
INFO:root:lr after step 600: 5.267248723062775e-05
INFO:root:train_loss after step 600: 0.19564335495233537
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 13: 0.2966028501590093: 
INFO:root:eval_accuracy_multilabel after epoch 13: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 13: 0.8951367735862732: 
INFO:root:eval_roc_auc after epoch 13: 0.7927715762898041: 
INFO:root:eval_fbeta after epoch 13: 0.34465381503105164: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 620: 0.29643559952576953: 
INFO:root:eval_accuracy_multilabel after step 620: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 620: 0.8939969539642334: 
INFO:root:eval_roc_auc after step 620: 0.792023947136868: 
INFO:root:eval_fbeta after step 620: 0.33540377020835876: 
INFO:root:lr after step 620: 4.964582201835856e-05
INFO:root:train_loss after step 620: 0.17977626100182534
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 640: 0.30015023549397785: 
INFO:root:eval_accuracy_multilabel after step 640: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 640: 0.8951367735862732: 
INFO:root:eval_roc_auc after step 640: 0.7947434481806733: 
INFO:root:eval_fbeta after step 640: 0.35188600420951843: 
INFO:root:lr after step 640: 4.6219224523667933e-05
INFO:root:train_loss after step 640: 0.1594530776143074
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 14: 0.2986389845609665: 
INFO:root:eval_accuracy_multilabel after epoch 14: 0.3404255319148936: 
INFO:root:eval_accuracy_thresh after epoch 14: 0.8932371139526367: 
INFO:root:eval_roc_auc after epoch 14: 0.792654399797565: 
INFO:root:eval_fbeta after epoch 14: 0.34893208742141724: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 660: 0.29680036505063373: 
INFO:root:eval_accuracy_multilabel after step 660: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 660: 0.8958966732025146: 
INFO:root:eval_roc_auc after step 660: 0.7980912452626192: 
INFO:root:eval_fbeta after step 660: 0.33045706152915955: 
INFO:root:lr after step 660: 4.24624503900566e-05
INFO:root:train_loss after step 660: 0.16190194338560104
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 680: 0.2984897444645564: 
INFO:root:eval_accuracy_multilabel after step 680: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 680: 0.8985562324523926: 
INFO:root:eval_roc_auc after step 680: 0.8001278158301847: 
INFO:root:eval_fbeta after step 680: 0.3398211896419525: 
INFO:root:lr after step 680: 3.845197670524289e-05
INFO:root:train_loss after step 680: 0.13759037293493748
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 700: 0.3061739305655162: 
INFO:root:eval_accuracy_multilabel after step 700: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 700: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 700: 0.7960388594629724: 
INFO:root:eval_fbeta after step 700: 0.3867374360561371: 
INFO:root:lr after step 700: 3.426944514819856e-05
INFO:root:train_loss after step 700: 0.138852896168828
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 15: 0.30573462943236035: 
INFO:root:eval_accuracy_multilabel after epoch 15: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after epoch 15: 0.8939969539642334: 
INFO:root:eval_roc_auc after epoch 15: 0.7946169263240225: 
INFO:root:eval_fbeta after epoch 15: 0.3592858612537384: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 720: 0.31149208545684814: 
INFO:root:eval_accuracy_multilabel after step 720: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after step 720: 0.8932371139526367: 
INFO:root:eval_roc_auc after step 720: 0.7891189765531996: 
INFO:root:eval_fbeta after step 720: 0.34024834632873535: 
INFO:root:lr after step 720: 3e-05
INFO:root:train_loss after step 720: 0.13071989603340625
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 740: 0.31572124858697254: 
INFO:root:eval_accuracy_multilabel after step 740: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 740: 0.8909574747085571: 
INFO:root:eval_roc_auc after step 740: 0.7871657953911537: 
INFO:root:eval_fbeta after step 740: 0.370752215385437: 
INFO:root:lr after step 740: 2.573055485180145e-05
INFO:root:train_loss after step 740: 0.11908238343894481
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 16: 0.3170862744251887: 
INFO:root:eval_accuracy_multilabel after epoch 16: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after epoch 16: 0.8924772143363953: 
INFO:root:eval_roc_auc after epoch 16: 0.7850041407153086: 
INFO:root:eval_fbeta after epoch 16: 0.3538195788860321: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 760: 0.3135870099067688: 
INFO:root:eval_accuracy_multilabel after step 760: 0.2925531914893617: 
INFO:root:eval_accuracy_thresh after step 760: 0.8966565728187561: 
INFO:root:eval_roc_auc after step 760: 0.7910124911578474: 
INFO:root:eval_fbeta after step 760: 0.3488994240760803: 
INFO:root:lr after step 760: 2.154802329475711e-05
INFO:root:train_loss after step 760: 0.11136390678584576
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 780: 0.31330008308092755: 
INFO:root:eval_accuracy_multilabel after step 780: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 780: 0.896276593208313: 
INFO:root:eval_roc_auc after step 780: 0.7917141123629108: 
INFO:root:eval_fbeta after step 780: 0.3533134460449219: 
INFO:root:lr after step 780: 1.753754960994341e-05
INFO:root:train_loss after step 780: 0.10177420675754548
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 17: 0.316981961329778: 
INFO:root:eval_accuracy_multilabel after epoch 17: 0.30319148936170215: 
INFO:root:eval_accuracy_thresh after epoch 17: 0.890577495098114: 
INFO:root:eval_roc_auc after epoch 17: 0.7933934024602749: 
INFO:root:eval_fbeta after epoch 17: 0.3549114763736725: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 800: 0.31700797379016876: 
INFO:root:eval_accuracy_multilabel after step 800: 0.30851063829787234: 
INFO:root:eval_accuracy_thresh after step 800: 0.890577495098114: 
INFO:root:eval_roc_auc after step 800: 0.793481105110908: 
INFO:root:eval_fbeta after step 800: 0.34991779923439026: 
INFO:root:lr after step 800: 1.3780775476332083e-05
INFO:root:train_loss after step 800: 0.10468153320252896
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 820: 0.3181575685739517: 
INFO:root:eval_accuracy_multilabel after step 820: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 820: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 820: 0.7904510504189599: 
INFO:root:eval_fbeta after step 820: 0.35852667689323425: 
INFO:root:lr after step 820: 1.035417798164145e-05
INFO:root:train_loss after step 820: 0.09751340597867966
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 840: 0.317247877518336: 
INFO:root:eval_accuracy_multilabel after step 840: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 840: 0.8943769335746765: 
INFO:root:eval_roc_auc after step 840: 0.7905380341954072: 
INFO:root:eval_fbeta after step 840: 0.35032030940055847: 
INFO:root:lr after step 840: 7.3275127693722555e-06
INFO:root:train_loss after step 840: 0.09247569218277932
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 18: 0.3161810388167699: 
INFO:root:eval_accuracy_multilabel after epoch 18: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 18: 0.8920972943305969: 
INFO:root:eval_roc_auc after epoch 18: 0.7918262567358512: 
INFO:root:eval_fbeta after epoch 18: 0.35417118668556213: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 860: 0.3168712208668391: 
INFO:root:eval_accuracy_multilabel after step 860: 0.31382978723404253: 
INFO:root:eval_accuracy_thresh after step 860: 0.8924772143363953: 
INFO:root:eval_roc_auc after step 860: 0.7934545067660439: 
INFO:root:eval_fbeta after step 860: 0.3499216139316559: 
INFO:root:lr after step 860: 4.76239401506456e-06
INFO:root:train_loss after step 860: 0.09203909933567048
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 880: 0.3165478656689326: 
INFO:root:eval_accuracy_multilabel after step 880: 0.32978723404255317: 
INFO:root:eval_accuracy_thresh after step 880: 0.8936170339584351: 
INFO:root:eval_roc_auc after step 880: 0.790755134199433: 
INFO:root:eval_fbeta after step 880: 0.35140088200569153: 
INFO:root:lr after step 880: 2.711040139364447e-06
INFO:root:train_loss after step 880: 0.09014642089605332
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 19: 0.31725341578324634: 
INFO:root:eval_accuracy_multilabel after epoch 19: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 19: 0.8936170339584351: 
INFO:root:eval_roc_auc after epoch 19: 0.7911864587107422: 
INFO:root:eval_fbeta after epoch 19: 0.35212621092796326: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 900: 0.3183351506789525: 
INFO:root:eval_accuracy_multilabel after step 900: 0.3191489361702128: 
INFO:root:eval_accuracy_thresh after step 900: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 900: 0.791103788179408: 
INFO:root:eval_fbeta after step 900: 0.35263797640800476: 
INFO:root:lr after step 900: 1.2152107915650823e-06
INFO:root:train_loss after step 900: 0.08820373676717282
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 920: 0.3184267332156499: 
INFO:root:eval_accuracy_multilabel after step 920: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 920: 0.8920972943305969: 
INFO:root:eval_roc_auc after step 920: 0.7912561895067373: 
INFO:root:eval_fbeta after step 920: 0.35287976264953613: 
INFO:root:lr after step 920: 3.053567435720195e-07
INFO:root:train_loss after step 920: 0.08838593736290931
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after step 940: 0.31818415721257526: 
INFO:root:eval_accuracy_multilabel after step 940: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after step 940: 0.8928571343421936: 
INFO:root:eval_roc_auc after step 940: 0.7913374222897006: 
INFO:root:eval_fbeta after step 940: 0.35287976264953613: 
INFO:root:lr after step 940: 0.0
INFO:root:train_loss after step 940: 0.08576637730002404
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 32


INFO:root:eval_loss after epoch 20: 0.31818415721257526: 
INFO:root:eval_accuracy_multilabel after epoch 20: 0.324468085106383: 
INFO:root:eval_accuracy_thresh after epoch 20: 0.8928571343421936: 
INFO:root:eval_roc_auc after epoch 20: 0.7913374222897006: 
INFO:root:eval_fbeta after epoch 20: 0.35287976264953613: 


(940, 0.26950127923900774)

#### Save the model

In [11]:
learner.save_model()

## Wrap into one function

Let's make all the above into a function so we can call it multiple times easily and sequentially

In [7]:
BASE = Path('data/phenotype_classification/')
LABEL_PATH = BASE
BIOBERT_PATH = Path('biobert/')

def train(path_to_directory, model):
        
    DATA_PATH = BASE/path_to_directory
    OUTPUT_DIR = DATA_PATH/'output'/model 
    OUTPUT_DIR.mkdir(parents=True,exist_ok=True)
    
    if (model == "biobert"):
        tokenizer = BertTokenizer.from_pretrained(BIOBERT_PATH, 
                                                  do_lower_case=True)
        pretrained_path=BIOBERT_PATH
    elif (model == "bert"):
        tokenizer = "bert-base-uncased"
        pretrained_path="bert-base-uncased"
    else:
        print ("Model parameter must be either 'bert' or 'biobert'")
        return
    
    databunch = BertDataBunch(DATA_PATH, 
                              LABEL_PATH,
                              tokenizer=tokenizer,
                              train_file='train.csv',
                              val_file='val.csv',
                              text_col='text',
                              label_file='labels.csv',
                              label_col=labels,
                              batch_size_per_gpu=10,
                              max_seq_length=512,
                              multi_gpu=multi_gpu,
                              multi_label=True,
                              model_type='bert',
                              clear_cache=True)
    
    learner = BertLearner.from_pretrained_model(databunch,
                                                pretrained_path=pretrained_path,
                                                metrics=metrics,
                                                device=device_cuda,
                                                logger=logger,
                                                output_dir=OUTPUT_DIR,
                                                finetuned_wgts_path=None,
                                                warmup_steps=500,
                                                multi_gpu=multi_gpu,
                                                is_fp16=True,
                                                multi_label=True,
                                                logging_steps=20)
    
    if path_to_directory.split('/',1)[1] in ['original','synthetic']:
        epochs = 20
    else:
        epochs = 10
        
    learner.fit(epochs=epochs,
                lr=6e-5,
                validate=True, # Evaluate the model after each epoch
                schedule_type="warmup_cosine")
    
    learner.save_model()
    
    return

Let's get training!

In [8]:
for directory in ['original','original_2x','synthetic','combined','original_eda']:
    for model in ['biobert','bert']:
        train('transformer/'+directory, model)

INFO:pytorch_transformers.tokenization_utils:Model name 'biobert' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming 'biobert' is a path or url to a directory containing tokenizer files.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/added_tokens.json. We won't load it.
INFO:pytorch_transformers.tokenization_utils:Didn't find file biobert/special_tokens_map.json. We won't load it.
INFO:pytorch_transformers.tokenization_utils:loading file None
INFO:pytorch_transformers.tokenization_utils:loading file None
INFO:pytorch_transformers.tokenization_utils:loading file biobert/vocab.tx

data/phenotype_classification/transformer/original/output/biobert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 20: 0.681080985069275: 
INFO:root:eval_accuracy_multilabel after step 20: 0.0: 
INFO:root:eval_accuracy_thresh after step 20: 0.5015197396278381: 
INFO:root:eval_roc_auc after step 20: 0.47685546296479137: 
INFO:root:eval_fbeta after step 20: 0.385684072971344: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.704734867811203
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 1: 0.591264259815216: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.015957446808510637: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.7142857313156128: 
INFO:root:eval_roc_auc after epoch 1: 0.5180183793763946: 
INFO:root:eval_fbeta after epoch 1: 0.3928638994693756: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 40: 0.5783370971679688: 
INFO:root:eval_accuracy_multilabel after step 40: 0.015957446808510637: 
INFO:root:eval_accuracy_thresh after step 40: 0.7644376754760742: 
INFO:root:eval_roc_auc after step 40: 0.5195623358543473: 
INFO:root:eval_fbeta after step 40: 0.3928638994693756: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.6380944550037384
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 60: 0.46692118644714353: 
INFO:root:eval_accuracy_multilabel after step 60: 0.16489361702127658: 
INFO:root:eval_accuracy_thresh after step 60: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 60: 0.6024756373294367: 
INFO:root:eval_fbeta after step 60: 0.3582831621170044: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5310846537351608
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 2: 0.4214345157146454: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 2: 0.6260576904089572: 
INFO:root:eval_fbeta after epoch 2: 0.11931895464658737: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 80: 0.4159953474998474: 
INFO:root:eval_accuracy_multilabel after step 80: 0.06382978723404255: 
INFO:root:eval_accuracy_thresh after step 80: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 80: 0.628742027211153: 
INFO:root:eval_fbeta after step 80: 0.20526163280010223: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.43965174108743665
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 100: 0.3934819757938385: 
INFO:root:eval_accuracy_multilabel after step 100: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 100: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 100: 0.6434713226437075: 
INFO:root:eval_fbeta after step 100: 0.11784141510725021: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.39844305366277694
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 3: 0.38626232743263245: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 3: 0.6472293941667817: 
INFO:root:eval_fbeta after epoch 3: 0.06592822074890137: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 120: 0.3842368423938751: 
INFO:root:eval_accuracy_multilabel after step 120: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 120: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 120: 0.655741518729883: 
INFO:root:eval_fbeta after step 120: 0.11252226680517197: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.3932197719812393
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 140: 0.37992182970046995: 
INFO:root:eval_accuracy_multilabel after step 140: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 140: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 140: 0.655031249382911: 
INFO:root:eval_fbeta after step 140: 0.0: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.36812710762023926
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 4: 0.3792991995811462: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 4: 0.6603635147410203: 
INFO:root:eval_fbeta after epoch 4: 0.1148863285779953: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 160: 0.3781736731529236: 
INFO:root:eval_accuracy_multilabel after step 160: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 160: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 160: 0.6583592099287139: 
INFO:root:eval_fbeta after step 160: 0.06265490502119064: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.377841554582119
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 180: 0.37872740626335144: 
INFO:root:eval_accuracy_multilabel after step 180: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 180: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 180: 0.6604178185659839: 
INFO:root:eval_fbeta after step 180: 0.09622365236282349: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.3699799567461014
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 5: 0.37413774728775023: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 5: 0.6754377628798799: 
INFO:root:eval_fbeta after epoch 5: 0.002045826520770788: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 200: 0.37563308477401736: 
INFO:root:eval_accuracy_multilabel after step 200: 0.06914893617021277: 
INFO:root:eval_accuracy_thresh after step 200: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 200: 0.6718771598112202: 
INFO:root:eval_fbeta after step 200: 0.002045826520770788: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.36433395445346833
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 220: 0.36893428564071656: 
INFO:root:eval_accuracy_multilabel after step 220: 0.11170212765957446: 
INFO:root:eval_accuracy_thresh after step 220: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 220: 0.6934561422562746: 
INFO:root:eval_fbeta after step 220: 0.04983540251851082: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.3662692531943321
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 6: 0.376698762178421: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.06382978723404255: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 6: 0.6756969402262988: 
INFO:root:eval_fbeta after epoch 6: 0.0765368789434433: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 240: 0.36316806077957153: 
INFO:root:eval_accuracy_multilabel after step 240: 0.14361702127659576: 
INFO:root:eval_accuracy_thresh after step 240: 0.8628419637680054: 
INFO:root:eval_roc_auc after step 240: 0.709621403605774: 
INFO:root:eval_fbeta after step 240: 0.15398387610912323: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.37272508889436723
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 260: 0.3545926570892334: 
INFO:root:eval_accuracy_multilabel after step 260: 0.2978723404255319: 
INFO:root:eval_accuracy_thresh after step 260: 0.8670212626457214: 
INFO:root:eval_roc_auc after step 260: 0.7453971337453842: 
INFO:root:eval_fbeta after step 260: 0.24575252830982208: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.35224203020334244
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 7: 0.3534060835838318: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.22340425531914893: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8666413426399231: 
INFO:root:eval_roc_auc after epoch 7: 0.7406319731047966: 
INFO:root:eval_fbeta after epoch 7: 0.1811540424823761: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 280: 0.3466308653354645: 
INFO:root:eval_accuracy_multilabel after step 280: 0.2925531914893617: 
INFO:root:eval_accuracy_thresh after step 280: 0.865121603012085: 
INFO:root:eval_roc_auc after step 280: 0.7585337226753026: 
INFO:root:eval_fbeta after step 280: 0.14588923752307892: 
INFO:root:lr after step 280: 3.3600000000000004e-05
INFO:root:train_loss after step 280: 0.34442498087882994
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 300: 0.33219206929206846: 
INFO:root:eval_accuracy_multilabel after step 300: 0.40425531914893614: 
INFO:root:eval_accuracy_thresh after step 300: 0.8784194588661194: 
INFO:root:eval_roc_auc after step 300: 0.7721689194526175: 
INFO:root:eval_fbeta after step 300: 0.2633465528488159: 
INFO:root:lr after step 300: 3.6e-05
INFO:root:train_loss after step 300: 0.3330348029732704
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 8: 0.3306792140007019: 
INFO:root:eval_accuracy_multilabel after epoch 8: 0.42021276595744683: 
INFO:root:eval_accuracy_thresh after epoch 8: 0.8795592784881592: 
INFO:root:eval_roc_auc after epoch 8: 0.7776091753717342: 
INFO:root:eval_fbeta after epoch 8: 0.29607677459716797: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 320: 0.32664244771003725: 
INFO:root:eval_accuracy_multilabel after step 320: 0.39893617021276595: 
INFO:root:eval_accuracy_thresh after step 320: 0.8768997192382812: 
INFO:root:eval_roc_auc after step 320: 0.7831382920953378: 
INFO:root:eval_fbeta after step 320: 0.24172502756118774: 
INFO:root:lr after step 320: 3.8400000000000005e-05
INFO:root:train_loss after step 320: 0.3205516755580902
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 340: 0.3169743001461029: 
INFO:root:eval_accuracy_multilabel after step 340: 0.43617021276595747: 
INFO:root:eval_accuracy_thresh after step 340: 0.8818389177322388: 
INFO:root:eval_roc_auc after step 340: 0.7970875871329555: 
INFO:root:eval_fbeta after step 340: 0.2952215075492859: 
INFO:root:lr after step 340: 4.08e-05
INFO:root:train_loss after step 340: 0.3089729070663452
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 9: 0.3160004436969757: 
INFO:root:eval_accuracy_multilabel after epoch 9: 0.43617021276595747: 
INFO:root:eval_accuracy_thresh after epoch 9: 0.8829787373542786: 
INFO:root:eval_roc_auc after epoch 9: 0.8004075255227977: 
INFO:root:eval_fbeta after epoch 9: 0.3349592983722687: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 360: 0.3127008080482483: 
INFO:root:eval_accuracy_multilabel after step 360: 0.42021276595744683: 
INFO:root:eval_accuracy_thresh after step 360: 0.8810790181159973: 
INFO:root:eval_roc_auc after step 360: 0.8000891076400546: 
INFO:root:eval_fbeta after step 360: 0.31304484605789185: 
INFO:root:lr after step 360: 4.32e-05
INFO:root:train_loss after step 360: 0.2945245660841465
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 380: 0.30668567419052123: 
INFO:root:eval_accuracy_multilabel after step 380: 0.425531914893617: 
INFO:root:eval_accuracy_thresh after step 380: 0.8833586573600769: 
INFO:root:eval_roc_auc after step 380: 0.810909144764124: 
INFO:root:eval_fbeta after step 380: 0.3807492256164551: 
INFO:root:lr after step 380: 4.5600000000000004e-05
INFO:root:train_loss after step 380: 0.2834231719374657
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 10: 0.30668567419052123: 
INFO:root:eval_accuracy_multilabel after epoch 10: 0.425531914893617: 
INFO:root:eval_accuracy_thresh after epoch 10: 0.8833586573600769: 
INFO:root:eval_roc_auc after epoch 10: 0.810909144764124: 
INFO:root:eval_fbeta after epoch 10: 0.3807492256164551: 
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/aa5118/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:root:Writing example 0 of 1500
INFO:root:Saving features into cached file data/phenotype_classification/transformer/original/cache/

data/phenotype_classification/transformer/original/output/bert/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 20: 0.6170857906341553: 
INFO:root:eval_accuracy_multilabel after step 20: 0.11170212765957446: 
INFO:root:eval_accuracy_thresh after step 20: 0.6177811622619629: 
INFO:root:eval_roc_auc after step 20: 0.5081776623684366: 
INFO:root:eval_fbeta after step 20: 0.3829066753387451: 
INFO:root:lr after step 20: 2.4000000000000003e-06
INFO:root:train_loss after step 20: 0.6760517209768295
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 1: 0.568950068950653: 
INFO:root:eval_accuracy_multilabel after epoch 1: 0.0797872340425532: 
INFO:root:eval_accuracy_thresh after epoch 1: 0.7363222241401672: 
INFO:root:eval_roc_auc after epoch 1: 0.5249674177050216: 
INFO:root:eval_fbeta after epoch 1: 0.37463003396987915: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 40: 0.5631999373435974: 
INFO:root:eval_accuracy_multilabel after step 40: 0.14361702127659576: 
INFO:root:eval_accuracy_thresh after step 40: 0.7632978558540344: 
INFO:root:eval_roc_auc after step 40: 0.5301947779467231: 
INFO:root:eval_fbeta after step 40: 0.37601062655448914: 
INFO:root:lr after step 40: 4.800000000000001e-06
INFO:root:train_loss after step 40: 0.5949405461549759
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 60: 0.5021929204463959: 
INFO:root:eval_accuracy_multilabel after step 60: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after step 60: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 60: 0.530729176951482: 
INFO:root:eval_fbeta after step 60: 0.35250866413116455: 
INFO:root:lr after step 60: 7.2e-06
INFO:root:train_loss after step 60: 0.5362770780920982
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 2: 0.461826765537262: 
INFO:root:eval_accuracy_multilabel after epoch 2: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after epoch 2: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 2: 0.5684184998321518: 
INFO:root:eval_fbeta after epoch 2: 0.3344569504261017: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 80: 0.4528141260147095: 
INFO:root:eval_accuracy_multilabel after step 80: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after step 80: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 80: 0.5805264015323552: 
INFO:root:eval_fbeta after step 80: 0.31288444995880127: 
INFO:root:lr after step 80: 9.600000000000001e-06
INFO:root:train_loss after step 80: 0.4824242264032364
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 100: 0.4176962375640869: 
INFO:root:eval_accuracy_multilabel after step 100: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after step 100: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 100: 0.6372066359274107: 
INFO:root:eval_fbeta after step 100: 0.2044326663017273: 
INFO:root:lr after step 100: 1.2e-05
INFO:root:train_loss after step 100: 0.43381373435258863
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 3: 0.40165682435035704: 
INFO:root:eval_accuracy_multilabel after epoch 3: 0.15425531914893617: 
INFO:root:eval_accuracy_thresh after epoch 3: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 3: 0.6522592859540689: 
INFO:root:eval_fbeta after epoch 3: 0.09700661152601242: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 120: 0.3958388686180115: 
INFO:root:eval_accuracy_multilabel after step 120: 0.06382978723404255: 
INFO:root:eval_accuracy_thresh after step 120: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 120: 0.6507147123871961: 
INFO:root:eval_fbeta after step 120: 0.07747478783130646: 
INFO:root:lr after step 120: 1.44e-05
INFO:root:train_loss after step 120: 0.4057565569877625
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 140: 0.3849126577377319: 
INFO:root:eval_accuracy_multilabel after step 140: 0.1595744680851064: 
INFO:root:eval_accuracy_thresh after step 140: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 140: 0.6549923727809482: 
INFO:root:eval_fbeta after step 140: 0.0: 
INFO:root:lr after step 140: 1.6800000000000002e-05
INFO:root:train_loss after step 140: 0.3851140007376671
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 4: 0.3802406549453735: 
INFO:root:eval_accuracy_multilabel after epoch 4: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 4: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 4: 0.6614952558203827: 
INFO:root:eval_fbeta after epoch 4: 0.023280978202819824: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 160: 0.3806151390075684: 
INFO:root:eval_accuracy_multilabel after step 160: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 160: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 160: 0.6562567879781205: 
INFO:root:eval_fbeta after step 160: 0.11784141510725021: 
INFO:root:lr after step 160: 1.9200000000000003e-05
INFO:root:train_loss after step 160: 0.37891134023666384
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 180: 0.3810895800590515: 
INFO:root:eval_accuracy_multilabel after step 180: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 180: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 180: 0.6483697744910251: 
INFO:root:eval_fbeta after step 180: 0.09395159035921097: 
INFO:root:lr after step 180: 2.16e-05
INFO:root:train_loss after step 180: 0.3708185777068138
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 5: 0.37868776321411135: 
INFO:root:eval_accuracy_multilabel after epoch 5: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 5: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 5: 0.6600105398787544: 
INFO:root:eval_fbeta after epoch 5: 0.0: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 200: 0.3773936152458191: 
INFO:root:eval_accuracy_multilabel after step 200: 0.12234042553191489: 
INFO:root:eval_accuracy_thresh after step 200: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 200: 0.6687898639442349: 
INFO:root:eval_fbeta after step 200: 0.0410366915166378: 
INFO:root:lr after step 200: 2.4e-05
INFO:root:train_loss after step 200: 0.36690195351839067
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 220: 0.3775757193565369: 
INFO:root:eval_accuracy_multilabel after step 220: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 220: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 220: 0.6661703214786437: 
INFO:root:eval_fbeta after step 220: 0.005610461346805096: 
INFO:root:lr after step 220: 2.64e-05
INFO:root:train_loss after step 220: 0.37381257563829423
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 6: 0.3765136420726776: 
INFO:root:eval_accuracy_multilabel after epoch 6: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after epoch 6: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 6: 0.6704239154045142: 
INFO:root:eval_fbeta after epoch 6: 0.005610461346805096: 
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 240: 0.3722682595252991: 
INFO:root:eval_accuracy_multilabel after step 240: 0.05851063829787234: 
INFO:root:eval_accuracy_thresh after step 240: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 240: 0.6845694447187063: 
INFO:root:eval_fbeta after step 240: 0.06126878038048744: 
INFO:root:lr after step 240: 2.88e-05
INFO:root:train_loss after step 240: 0.3672689452767372
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after step 260: 0.3624307632446289: 
INFO:root:eval_accuracy_multilabel after step 260: 0.21808510638297873: 
INFO:root:eval_accuracy_thresh after step 260: 0.8647416234016418: 
INFO:root:eval_roc_auc after step 260: 0.7229283090776248: 
INFO:root:eval_fbeta after step 260: 0.08554257452487946: 
INFO:root:lr after step 260: 3.12e-05
INFO:root:train_loss after step 260: 0.3653022199869156
INFO:root:Running evaluation
INFO:root:  Num examples = 188
INFO:root:  Batch size = 40


INFO:root:eval_loss after epoch 7: 0.3585233151912689: 
INFO:root:eval_accuracy_multilabel after epoch 7: 0.14893617021276595: 
INFO:root:eval_accuracy_thresh after epoch 7: 0.8647416234016418: 
INFO:root:eval_roc_auc after epoch 7: 0.7265302571039276: 
INFO:root:eval_fbeta after epoch 7: 0.07309160381555557: 


KeyboardInterrupt: 