In [1]:
import torch
import pandas as pd
from torch.optim.adam import Adam
from flair.data import Corpus
from flair.datasets import CSVClassificationCorpus
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentRNNEmbeddings,  TransformerWordEmbeddings, TransformerDocumentEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer

# load data

In [2]:
# this is the folder in which train, test and dev files reside
data_folder = './dataset/Spooky Author Identification/split/'

# column format indicating which columns hold the text and label(s)
column_name_map = {2: "text", 3: "label"}

# load corpus containing training, test and dev data and if CSV has a header, you can skip it
corpus: Corpus = CSVClassificationCorpus(data_folder,
                                         column_name_map,
                                         skip_header=True,
                                         delimiter=',',    # tab-separated files
) 
label_dict = corpus.make_label_dictionary()

2020-10-22 01:57:39,711 Reading data from dataset/Spooky Author Identification/split
2020-10-22 01:57:39,712 Train: dataset/Spooky Author Identification/split/train.csv
2020-10-22 01:57:39,712 Dev: dataset/Spooky Author Identification/split/dev.csv
2020-10-22 01:57:39,713 Test: dataset/Spooky Author Identification/split/test.csv
2020-10-22 01:57:39,767 Computing label dictionary. Progress:


100%|██████████| 15668/15668 [00:02<00:00, 5498.08it/s]

2020-10-22 01:57:42,736 [b'EAP', b'MWS', b'HPL']





In [3]:
print(corpus.obtain_statistics())

{
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 11762,
        "number_of_documents_per_class": {
            "EAP": 4780,
            "MWS": 3642,
            "HPL": 3340
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 354144,
            "min": 4,
            "max": 446,
            "avg": 30.109165107974835
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 3906,
        "number_of_documents_per_class": {
            "EAP": 1554,
            "HPL": 1142,
            "MWS": 1210
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 120056,
            "min": 4,
            "max": 875,
            "avg": 30.736303123399896
        }
    },
    "DEV": {
        "dataset": "DEV",
        "total_number_of_documents": 3911,
        "number_of_documents_per_class": {
            "HPL": 1153,
            "EAP": 15

# glove embeddings

In [36]:
word_embeddings = [WordEmbeddings('glove')]

document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=256)

classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

trainer = ModelTrainer(classifier, corpus)

trainer.train('classifiers/spooky_authorship_classifier_glove',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

2020-10-21 23:29:36,541 ----------------------------------------------------------------------------------------------------
2020-10-21 23:29:36,541 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
    )
    (word_reprojection_map): Linear(in_features=100, out_features=100, bias=True)
    (rnn): GRU(100, 256, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=256, out_features=3, bias=True)
  (loss_function): CrossEntropyLoss()
  (beta): 1.0
  (weights): None
  (weight_tensor) None
)"
2020-10-21 23:29:36,542 ----------------------------------------------------------------------------------------------------
2020-10-21 23:29:36,542 Corpus: "Corpus: 11762 train + 3911 dev + 3906 test sentences"
2020-10-21 23:29:36,543 ----------------------------------------------------------------------------------------------------
2020-10-21 23:2

2020-10-21 23:31:47,286 epoch 5 - iter 144/368 - loss 0.93159901 - samples/sec: 738.12 - lr: 0.100000
2020-10-21 23:31:48,814 epoch 5 - iter 180/368 - loss 0.93545088 - samples/sec: 787.14 - lr: 0.100000
2020-10-21 23:31:52,467 epoch 5 - iter 216/368 - loss 0.93576069 - samples/sec: 786.86 - lr: 0.100000
2020-10-21 23:31:53,979 epoch 5 - iter 252/368 - loss 0.93436251 - samples/sec: 794.39 - lr: 0.100000
2020-10-21 23:31:55,521 epoch 5 - iter 288/368 - loss 0.93376560 - samples/sec: 777.09 - lr: 0.100000
2020-10-21 23:31:57,153 epoch 5 - iter 324/368 - loss 0.93307721 - samples/sec: 734.47 - lr: 0.100000
2020-10-21 23:31:58,769 epoch 5 - iter 360/368 - loss 0.92926995 - samples/sec: 742.29 - lr: 0.100000
2020-10-21 23:31:59,223 ----------------------------------------------------------------------------------------------------
2020-10-21 23:31:59,224 EPOCH 5 done: loss 0.9314 - lr 0.1000000
2020-10-21 23:32:07,426 DEV : loss 0.8543479442596436 - score 0.6339
2020-10-21 23:32:09,326 BAD

2020-10-21 23:34:36,552 DEV : loss 0.8141556978225708 - score 0.6446
2020-10-21 23:34:38,455 BAD EPOCHS (no improvement): 1
2020-10-21 23:34:38,456 ----------------------------------------------------------------------------------------------------
2020-10-21 23:34:40,530 epoch 11 - iter 36/368 - loss 0.85544606 - samples/sec: 638.11 - lr: 0.100000
2020-10-21 23:34:44,429 epoch 11 - iter 72/368 - loss 0.84553569 - samples/sec: 705.24 - lr: 0.100000
2020-10-21 23:34:46,108 epoch 11 - iter 108/368 - loss 0.83701550 - samples/sec: 714.98 - lr: 0.100000
2020-10-21 23:34:47,778 epoch 11 - iter 144/368 - loss 0.82869205 - samples/sec: 718.05 - lr: 0.100000
2020-10-21 23:34:49,350 epoch 11 - iter 180/368 - loss 0.82934599 - samples/sec: 762.89 - lr: 0.100000
2020-10-21 23:34:50,991 epoch 11 - iter 216/368 - loss 0.82982853 - samples/sec: 734.55 - lr: 0.100000
2020-10-21 23:34:52,578 epoch 11 - iter 252/368 - loss 0.82439812 - samples/sec: 757.42 - lr: 0.100000
2020-10-21 23:34:54,232 epoch 11

2020-10-21 23:37:26,390 epoch 16 - iter 252/368 - loss 0.78221223 - samples/sec: 792.90 - lr: 0.100000
2020-10-21 23:37:27,960 epoch 16 - iter 288/368 - loss 0.78330726 - samples/sec: 765.72 - lr: 0.100000
2020-10-21 23:37:29,714 epoch 16 - iter 324/368 - loss 0.78588932 - samples/sec: 684.91 - lr: 0.100000
2020-10-21 23:37:31,391 epoch 16 - iter 360/368 - loss 0.78471214 - samples/sec: 713.99 - lr: 0.100000
2020-10-21 23:37:31,778 ----------------------------------------------------------------------------------------------------
2020-10-21 23:37:31,778 EPOCH 16 done: loss 0.7848 - lr 0.1000000
2020-10-21 23:37:40,544 DEV : loss 0.6886066794395447 - score 0.7111
2020-10-21 23:37:42,504 BAD EPOCHS (no improvement): 0
saving best model
2020-10-21 23:37:44,207 ----------------------------------------------------------------------------------------------------
2020-10-21 23:37:46,457 epoch 17 - iter 36/368 - loss 0.77470153 - samples/sec: 597.24 - lr: 0.100000
2020-10-21 23:37:48,294 epoc

2020-10-21 23:40:20,836 epoch 22 - iter 36/368 - loss 0.69647878 - samples/sec: 645.69 - lr: 0.100000
2020-10-21 23:40:22,510 epoch 22 - iter 72/368 - loss 0.72251955 - samples/sec: 721.13 - lr: 0.100000
2020-10-21 23:40:24,171 epoch 22 - iter 108/368 - loss 0.73687818 - samples/sec: 727.06 - lr: 0.100000
2020-10-21 23:40:25,857 epoch 22 - iter 144/368 - loss 0.73680936 - samples/sec: 717.18 - lr: 0.100000
2020-10-21 23:40:27,567 epoch 22 - iter 180/368 - loss 0.73450513 - samples/sec: 702.94 - lr: 0.100000
2020-10-21 23:40:29,268 epoch 22 - iter 216/368 - loss 0.73652112 - samples/sec: 708.23 - lr: 0.100000
2020-10-21 23:40:33,613 epoch 22 - iter 252/368 - loss 0.73656856 - samples/sec: 635.70 - lr: 0.100000
2020-10-21 23:40:35,341 epoch 22 - iter 288/368 - loss 0.73935149 - samples/sec: 694.91 - lr: 0.100000
2020-10-21 23:40:37,051 epoch 22 - iter 324/368 - loss 0.73551780 - samples/sec: 701.49 - lr: 0.100000
2020-10-21 23:40:38,779 epoch 22 - iter 360/368 - loss 0.73459725 - samples

2020-10-21 23:43:13,025 epoch 27 - iter 360/368 - loss 0.69734888 - samples/sec: 679.13 - lr: 0.100000
2020-10-21 23:43:13,506 ----------------------------------------------------------------------------------------------------
2020-10-21 23:43:13,506 EPOCH 27 done: loss 0.6982 - lr 0.1000000
2020-10-21 23:43:19,963 DEV : loss 0.7510592937469482 - score 0.6883
2020-10-21 23:43:21,909 BAD EPOCHS (no improvement): 3
2020-10-21 23:43:21,910 ----------------------------------------------------------------------------------------------------
2020-10-21 23:43:24,122 epoch 28 - iter 36/368 - loss 0.70842419 - samples/sec: 596.64 - lr: 0.100000
2020-10-21 23:43:26,043 epoch 28 - iter 72/368 - loss 0.68488073 - samples/sec: 628.91 - lr: 0.100000
2020-10-21 23:43:30,136 epoch 28 - iter 108/368 - loss 0.69510447 - samples/sec: 671.31 - lr: 0.100000
2020-10-21 23:43:31,722 epoch 28 - iter 144/368 - loss 0.69131341 - samples/sec: 759.46 - lr: 0.100000
2020-10-21 23:43:33,369 epoch 28 - iter 180/368

2020-10-21 23:46:04,589 epoch 33 - iter 144/368 - loss 0.67661164 - samples/sec: 712.76 - lr: 0.100000
2020-10-21 23:46:08,459 epoch 33 - iter 180/368 - loss 0.67139573 - samples/sec: 719.95 - lr: 0.100000
2020-10-21 23:46:10,121 epoch 33 - iter 216/368 - loss 0.66805120 - samples/sec: 720.73 - lr: 0.100000
2020-10-21 23:46:11,827 epoch 33 - iter 252/368 - loss 0.66920253 - samples/sec: 704.58 - lr: 0.100000
2020-10-21 23:46:13,490 epoch 33 - iter 288/368 - loss 0.66717444 - samples/sec: 723.08 - lr: 0.100000
2020-10-21 23:46:15,164 epoch 33 - iter 324/368 - loss 0.67058101 - samples/sec: 718.22 - lr: 0.100000
2020-10-21 23:46:16,841 epoch 33 - iter 360/368 - loss 0.67135442 - samples/sec: 714.58 - lr: 0.100000
2020-10-21 23:46:17,291 ----------------------------------------------------------------------------------------------------
2020-10-21 23:46:17,291 EPOCH 33 done: loss 0.6714 - lr 0.1000000
2020-10-21 23:46:25,687 DEV : loss 0.592469334602356 - score 0.754
2020-10-21 23:46:27,6

2020-10-21 23:49:01,867 DEV : loss 0.6200039982795715 - score 0.7481
Epoch    38: reducing learning rate of group 0 to 5.0000e-02.
2020-10-21 23:49:03,771 BAD EPOCHS (no improvement): 6
2020-10-21 23:49:03,772 ----------------------------------------------------------------------------------------------------
2020-10-21 23:49:08,130 epoch 39 - iter 36/368 - loss 0.60912606 - samples/sec: 701.93 - lr: 0.050000
2020-10-21 23:49:09,686 epoch 39 - iter 72/368 - loss 0.62134207 - samples/sec: 768.91 - lr: 0.050000
2020-10-21 23:49:11,229 epoch 39 - iter 108/368 - loss 0.61379554 - samples/sec: 776.37 - lr: 0.050000
2020-10-21 23:49:12,898 epoch 39 - iter 144/368 - loss 0.61025615 - samples/sec: 718.06 - lr: 0.050000
2020-10-21 23:49:14,460 epoch 39 - iter 180/368 - loss 0.60833614 - samples/sec: 768.07 - lr: 0.050000
2020-10-21 23:49:18,045 epoch 39 - iter 216/368 - loss 0.60684191 - samples/sec: 761.02 - lr: 0.050000
2020-10-21 23:49:21,582 epoch 39 - iter 252/368 - loss 0.59948962 - sampl

2020-10-21 23:51:56,106 epoch 44 - iter 252/368 - loss 0.57350114 - samples/sec: 772.12 - lr: 0.050000
2020-10-21 23:51:57,656 epoch 44 - iter 288/368 - loss 0.57407963 - samples/sec: 775.60 - lr: 0.050000
2020-10-21 23:51:59,246 epoch 44 - iter 324/368 - loss 0.57679555 - samples/sec: 752.15 - lr: 0.050000
2020-10-21 23:52:00,920 epoch 44 - iter 360/368 - loss 0.57841598 - samples/sec: 715.00 - lr: 0.050000
2020-10-21 23:52:03,411 ----------------------------------------------------------------------------------------------------
2020-10-21 23:52:03,412 EPOCH 44 done: loss 0.5792 - lr 0.0500000
2020-10-21 23:52:09,721 DEV : loss 0.6020042300224304 - score 0.7553
2020-10-21 23:52:11,619 BAD EPOCHS (no improvement): 5
2020-10-21 23:52:11,619 ----------------------------------------------------------------------------------------------------
2020-10-21 23:52:13,792 epoch 45 - iter 36/368 - loss 0.56390259 - samples/sec: 623.59 - lr: 0.050000
2020-10-21 23:52:15,505 epoch 45 - iter 72/368

2020-10-21 23:55:04,027 epoch 50 - iter 36/368 - loss 0.52880050 - samples/sec: 639.72 - lr: 0.025000
2020-10-21 23:55:05,769 epoch 50 - iter 72/368 - loss 0.55378627 - samples/sec: 692.33 - lr: 0.025000
2020-10-21 23:55:07,487 epoch 50 - iter 108/368 - loss 0.54427359 - samples/sec: 699.36 - lr: 0.025000
2020-10-21 23:55:09,134 epoch 50 - iter 144/368 - loss 0.55007847 - samples/sec: 728.54 - lr: 0.025000
2020-10-21 23:55:10,780 epoch 50 - iter 180/368 - loss 0.55188596 - samples/sec: 728.21 - lr: 0.025000
2020-10-21 23:55:12,428 epoch 50 - iter 216/368 - loss 0.55163858 - samples/sec: 725.77 - lr: 0.025000
2020-10-21 23:55:16,235 epoch 50 - iter 252/368 - loss 0.55229783 - samples/sec: 736.45 - lr: 0.025000
2020-10-21 23:55:17,828 epoch 50 - iter 288/368 - loss 0.54870663 - samples/sec: 753.64 - lr: 0.025000
2020-10-21 23:55:19,465 epoch 50 - iter 324/368 - loss 0.54721876 - samples/sec: 732.81 - lr: 0.025000
2020-10-21 23:55:21,118 epoch 50 - iter 360/368 - loss 0.54590359 - samples

2020-10-21 23:57:51,618 epoch 55 - iter 360/368 - loss 0.53133018 - samples/sec: 729.24 - lr: 0.025000
2020-10-21 23:57:52,085 ----------------------------------------------------------------------------------------------------
2020-10-21 23:57:52,086 EPOCH 55 done: loss 0.5303 - lr 0.0250000
2020-10-21 23:57:58,293 DEV : loss 0.568246603012085 - score 0.775
2020-10-21 23:58:00,195 BAD EPOCHS (no improvement): 4
2020-10-21 23:58:00,196 ----------------------------------------------------------------------------------------------------
2020-10-21 23:58:02,397 epoch 56 - iter 36/368 - loss 0.52235101 - samples/sec: 615.88 - lr: 0.025000
2020-10-21 23:58:04,176 epoch 56 - iter 72/368 - loss 0.52217986 - samples/sec: 676.49 - lr: 0.025000
2020-10-21 23:58:08,029 epoch 56 - iter 108/368 - loss 0.51239761 - samples/sec: 720.15 - lr: 0.025000
2020-10-21 23:58:09,675 epoch 56 - iter 144/368 - loss 0.50639201 - samples/sec: 729.34 - lr: 0.025000
2020-10-21 23:58:11,359 epoch 56 - iter 180/368 -

2020-10-22 00:00:35,744 epoch 61 - iter 144/368 - loss 0.51698937 - samples/sec: 738.75 - lr: 0.012500
2020-10-22 00:00:39,497 epoch 61 - iter 180/368 - loss 0.51286459 - samples/sec: 744.92 - lr: 0.012500
2020-10-22 00:00:41,146 epoch 61 - iter 216/368 - loss 0.51260120 - samples/sec: 727.97 - lr: 0.012500
2020-10-22 00:00:42,729 epoch 61 - iter 252/368 - loss 0.51040675 - samples/sec: 761.67 - lr: 0.012500
2020-10-22 00:00:44,377 epoch 61 - iter 288/368 - loss 0.51086716 - samples/sec: 727.55 - lr: 0.012500
2020-10-22 00:00:45,977 epoch 61 - iter 324/368 - loss 0.51175125 - samples/sec: 750.18 - lr: 0.012500
2020-10-22 00:00:47,593 epoch 61 - iter 360/368 - loss 0.51282159 - samples/sec: 743.31 - lr: 0.012500
2020-10-22 00:00:48,063 ----------------------------------------------------------------------------------------------------
2020-10-22 00:00:48,064 EPOCH 61 done: loss 0.5130 - lr 0.0125000
2020-10-22 00:00:56,431 DEV : loss 0.6094518899917603 - score 0.7686
2020-10-22 00:00:58

2020-10-22 00:03:16,271 EPOCH 66 done: loss 0.4987 - lr 0.0062500
2020-10-22 00:03:22,533 DEV : loss 0.6037938594818115 - score 0.7719
2020-10-22 00:03:24,443 BAD EPOCHS (no improvement): 3
2020-10-22 00:03:24,444 ----------------------------------------------------------------------------------------------------
2020-10-22 00:03:28,913 epoch 67 - iter 36/368 - loss 0.46868625 - samples/sec: 596.37 - lr: 0.006250
2020-10-22 00:03:30,511 epoch 67 - iter 72/368 - loss 0.45895431 - samples/sec: 754.45 - lr: 0.006250
2020-10-22 00:03:32,129 epoch 67 - iter 108/368 - loss 0.48265560 - samples/sec: 741.31 - lr: 0.006250
2020-10-22 00:03:33,682 epoch 67 - iter 144/368 - loss 0.48881726 - samples/sec: 774.77 - lr: 0.006250
2020-10-22 00:03:35,205 epoch 67 - iter 180/368 - loss 0.48896166 - samples/sec: 789.34 - lr: 0.006250
2020-10-22 00:03:36,900 epoch 67 - iter 216/368 - loss 0.49793383 - samples/sec: 710.56 - lr: 0.006250
2020-10-22 00:03:38,565 epoch 67 - iter 252/368 - loss 0.49760378 - s

2020-10-22 00:06:07,465 epoch 72 - iter 216/368 - loss 0.49933101 - samples/sec: 712.07 - lr: 0.003125
2020-10-22 00:06:09,118 epoch 72 - iter 252/368 - loss 0.49351871 - samples/sec: 726.24 - lr: 0.003125
2020-10-22 00:06:10,757 epoch 72 - iter 288/368 - loss 0.49458674 - samples/sec: 733.55 - lr: 0.003125
2020-10-22 00:06:12,415 epoch 72 - iter 324/368 - loss 0.49406976 - samples/sec: 726.11 - lr: 0.003125
2020-10-22 00:06:15,977 epoch 72 - iter 360/368 - loss 0.49640846 - samples/sec: 798.03 - lr: 0.003125
2020-10-22 00:06:16,409 ----------------------------------------------------------------------------------------------------
2020-10-22 00:06:16,409 EPOCH 72 done: loss 0.4953 - lr 0.0031250
2020-10-22 00:06:22,708 DEV : loss 0.5951624512672424 - score 0.7742
2020-10-22 00:06:24,616 BAD EPOCHS (no improvement): 2
2020-10-22 00:06:24,617 ----------------------------------------------------------------------------------------------------
2020-10-22 00:06:26,798 epoch 73 - iter 36/36

2020-10-22 00:08:56,278 epoch 78 - iter 36/368 - loss 0.46860944 - samples/sec: 664.11 - lr: 0.003125
2020-10-22 00:08:57,989 epoch 78 - iter 72/368 - loss 0.47665452 - samples/sec: 702.42 - lr: 0.003125
2020-10-22 00:08:59,644 epoch 78 - iter 108/368 - loss 0.48204628 - samples/sec: 728.79 - lr: 0.003125
2020-10-22 00:09:01,332 epoch 78 - iter 144/368 - loss 0.48620361 - samples/sec: 713.12 - lr: 0.003125
2020-10-22 00:09:02,965 epoch 78 - iter 180/368 - loss 0.48318881 - samples/sec: 739.83 - lr: 0.003125
2020-10-22 00:09:06,825 epoch 78 - iter 216/368 - loss 0.48863978 - samples/sec: 701.68 - lr: 0.003125
2020-10-22 00:09:08,497 epoch 78 - iter 252/368 - loss 0.48834822 - samples/sec: 717.45 - lr: 0.003125
2020-10-22 00:09:10,173 epoch 78 - iter 288/368 - loss 0.48799227 - samples/sec: 715.47 - lr: 0.003125
2020-10-22 00:09:11,819 epoch 78 - iter 324/368 - loss 0.48809788 - samples/sec: 730.08 - lr: 0.003125
2020-10-22 00:09:13,478 epoch 78 - iter 360/368 - loss 0.48895554 - samples

2020-10-22 00:11:43,799 epoch 83 - iter 360/368 - loss 0.48651683 - samples/sec: 729.71 - lr: 0.003125
2020-10-22 00:11:44,224 ----------------------------------------------------------------------------------------------------
2020-10-22 00:11:44,224 EPOCH 83 done: loss 0.4870 - lr 0.0031250
2020-10-22 00:11:50,394 DEV : loss 0.579639196395874 - score 0.7827
2020-10-22 00:11:52,315 BAD EPOCHS (no improvement): 0
saving best model
2020-10-22 00:11:53,949 ----------------------------------------------------------------------------------------------------
2020-10-22 00:11:56,166 epoch 84 - iter 36/368 - loss 0.47806207 - samples/sec: 604.10 - lr: 0.003125
2020-10-22 00:12:00,093 epoch 84 - iter 72/368 - loss 0.48498031 - samples/sec: 709.42 - lr: 0.003125
2020-10-22 00:12:01,721 epoch 84 - iter 108/368 - loss 0.48017066 - samples/sec: 734.48 - lr: 0.003125
2020-10-22 00:12:03,466 epoch 84 - iter 144/368 - loss 0.48057049 - samples/sec: 687.00 - lr: 0.003125
2020-10-22 00:12:05,154 epoch 

2020-10-22 00:14:29,954 epoch 89 - iter 144/368 - loss 0.47267108 - samples/sec: 706.38 - lr: 0.003125
2020-10-22 00:14:33,788 epoch 89 - iter 180/368 - loss 0.47718580 - samples/sec: 741.19 - lr: 0.003125
2020-10-22 00:14:35,441 epoch 89 - iter 216/368 - loss 0.47646018 - samples/sec: 724.03 - lr: 0.003125
2020-10-22 00:14:37,100 epoch 89 - iter 252/368 - loss 0.47370626 - samples/sec: 722.31 - lr: 0.003125
2020-10-22 00:14:38,744 epoch 89 - iter 288/368 - loss 0.47468728 - samples/sec: 729.24 - lr: 0.003125
2020-10-22 00:14:40,375 epoch 89 - iter 324/368 - loss 0.47492149 - samples/sec: 733.72 - lr: 0.003125
2020-10-22 00:14:41,996 epoch 89 - iter 360/368 - loss 0.47372390 - samples/sec: 741.56 - lr: 0.003125
2020-10-22 00:14:42,477 ----------------------------------------------------------------------------------------------------
2020-10-22 00:14:42,478 EPOCH 89 done: loss 0.4742 - lr 0.0031250
2020-10-22 00:14:50,910 DEV : loss 0.593375027179718 - score 0.7768
Epoch    89: reducin

2020-10-22 00:17:15,123 EPOCH 94 done: loss 0.4866 - lr 0.0015625
2020-10-22 00:17:23,485 DEV : loss 0.585536777973175 - score 0.7814
2020-10-22 00:17:25,411 BAD EPOCHS (no improvement): 1
2020-10-22 00:17:25,412 ----------------------------------------------------------------------------------------------------
2020-10-22 00:17:27,606 epoch 95 - iter 36/368 - loss 0.45429086 - samples/sec: 615.24 - lr: 0.001563
2020-10-22 00:17:29,277 epoch 95 - iter 72/368 - loss 0.47406397 - samples/sec: 720.00 - lr: 0.001563
2020-10-22 00:17:30,979 epoch 95 - iter 108/368 - loss 0.47783762 - samples/sec: 705.16 - lr: 0.001563
2020-10-22 00:17:32,636 epoch 95 - iter 144/368 - loss 0.48098466 - samples/sec: 725.06 - lr: 0.001563
2020-10-22 00:17:34,309 epoch 95 - iter 180/368 - loss 0.47794133 - samples/sec: 721.84 - lr: 0.001563
2020-10-22 00:17:36,050 epoch 95 - iter 216/368 - loss 0.47851074 - samples/sec: 689.74 - lr: 0.001563
2020-10-22 00:17:37,752 epoch 95 - iter 252/368 - loss 0.48517195 - sa

2020-10-22 00:20:04,646 epoch 100 - iter 216/368 - loss 0.48473669 - samples/sec: 751.73 - lr: 0.000781
2020-10-22 00:20:06,271 epoch 100 - iter 252/368 - loss 0.47904748 - samples/sec: 739.84 - lr: 0.000781
2020-10-22 00:20:07,892 epoch 100 - iter 288/368 - loss 0.47648268 - samples/sec: 740.50 - lr: 0.000781
2020-10-22 00:20:09,504 epoch 100 - iter 324/368 - loss 0.47251173 - samples/sec: 748.58 - lr: 0.000781
2020-10-22 00:20:11,072 epoch 100 - iter 360/368 - loss 0.47580548 - samples/sec: 768.06 - lr: 0.000781
2020-10-22 00:20:13,558 ----------------------------------------------------------------------------------------------------
2020-10-22 00:20:13,559 EPOCH 100 done: loss 0.4759 - lr 0.0007813
2020-10-22 00:20:19,744 DEV : loss 0.5817830562591553 - score 0.7827
2020-10-22 00:20:21,669 BAD EPOCHS (no improvement): 1
2020-10-22 00:20:21,670 ----------------------------------------------------------------------------------------------------
2020-10-22 00:20:23,853 epoch 101 - ite

2020-10-22 00:22:52,164 ----------------------------------------------------------------------------------------------------
2020-10-22 00:22:54,361 epoch 106 - iter 36/368 - loss 0.44022814 - samples/sec: 613.12 - lr: 0.000391
2020-10-22 00:22:56,168 epoch 106 - iter 72/368 - loss 0.45736753 - samples/sec: 665.78 - lr: 0.000391
2020-10-22 00:22:57,821 epoch 106 - iter 108/368 - loss 0.47360443 - samples/sec: 726.85 - lr: 0.000391
2020-10-22 00:22:59,539 epoch 106 - iter 144/368 - loss 0.47645261 - samples/sec: 698.98 - lr: 0.000391
2020-10-22 00:23:03,458 epoch 106 - iter 180/368 - loss 0.47635900 - samples/sec: 299.58 - lr: 0.000391
2020-10-22 00:23:05,154 epoch 106 - iter 216/368 - loss 0.47604843 - samples/sec: 708.66 - lr: 0.000391
2020-10-22 00:23:06,812 epoch 106 - iter 252/368 - loss 0.47168294 - samples/sec: 724.02 - lr: 0.000391
2020-10-22 00:23:08,457 epoch 106 - iter 288/368 - loss 0.47200430 - samples/sec: 728.51 - lr: 0.000391
2020-10-22 00:23:10,032 epoch 106 - iter 324/

2020-10-22 00:25:46,458 epoch 111 - iter 288/368 - loss 0.48230134 - samples/sec: 807.66 - lr: 0.000391
2020-10-22 00:25:50,079 epoch 111 - iter 324/368 - loss 0.48483508 - samples/sec: 771.85 - lr: 0.000391
2020-10-22 00:25:51,716 epoch 111 - iter 360/368 - loss 0.48844734 - samples/sec: 732.82 - lr: 0.000391
2020-10-22 00:25:52,182 ----------------------------------------------------------------------------------------------------
2020-10-22 00:25:52,183 EPOCH 111 done: loss 0.4878 - lr 0.0003906
2020-10-22 00:25:58,499 DEV : loss 0.5836184620857239 - score 0.7811
Epoch   111: reducing learning rate of group 0 to 1.9531e-04.
2020-10-22 00:26:00,418 BAD EPOCHS (no improvement): 6
2020-10-22 00:26:00,419 ----------------------------------------------------------------------------------------------------
2020-10-22 00:26:02,597 epoch 112 - iter 36/368 - loss 0.45687327 - samples/sec: 613.19 - lr: 0.000195
2020-10-22 00:26:04,337 epoch 112 - iter 72/368 - loss 0.46682370 - samples/sec: 6

2020-10-22 00:28:32,591 epoch 117 - iter 36/368 - loss 0.44484593 - samples/sec: 594.28 - lr: 0.000195
2020-10-22 00:28:34,322 epoch 117 - iter 72/368 - loss 0.45819084 - samples/sec: 697.76 - lr: 0.000195
2020-10-22 00:28:35,900 epoch 117 - iter 108/368 - loss 0.46395852 - samples/sec: 765.60 - lr: 0.000195
2020-10-22 00:28:37,548 epoch 117 - iter 144/368 - loss 0.46154677 - samples/sec: 737.57 - lr: 0.000195
2020-10-22 00:28:39,216 epoch 117 - iter 180/368 - loss 0.46453326 - samples/sec: 723.39 - lr: 0.000195
2020-10-22 00:28:40,892 epoch 117 - iter 216/368 - loss 0.46760760 - samples/sec: 719.47 - lr: 0.000195
2020-10-22 00:28:44,668 epoch 117 - iter 252/368 - loss 0.47318098 - samples/sec: 752.29 - lr: 0.000195
2020-10-22 00:28:46,412 epoch 117 - iter 288/368 - loss 0.47503049 - samples/sec: 688.98 - lr: 0.000195
2020-10-22 00:28:48,096 epoch 117 - iter 324/368 - loss 0.47390440 - samples/sec: 715.52 - lr: 0.000195
2020-10-22 00:28:49,677 epoch 117 - iter 360/368 - loss 0.47584448

{'test_score': 0.7824,
 'dev_score_history': [0.5098,
  0.4845,
  0.5687,
  0.3728,
  0.6339,
  0.6249,
  0.6349,
  0.6305,
  0.6707,
  0.6446,
  0.686,
  0.6911,
  0.6137,
  0.5566,
  0.6916,
  0.7111,
  0.6796,
  0.7095,
  0.7236,
  0.6898,
  0.674,
  0.7185,
  0.7095,
  0.732,
  0.7037,
  0.6988,
  0.6883,
  0.6975,
  0.7504,
  0.7443,
  0.7274,
  0.7558,
  0.754,
  0.6824,
  0.7072,
  0.7369,
  0.6939,
  0.7481,
  0.763,
  0.7466,
  0.7515,
  0.7579,
  0.7589,
  0.7553,
  0.7512,
  0.7645,
  0.7694,
  0.7699,
  0.753,
  0.7655,
  0.7788,
  0.7627,
  0.7706,
  0.7612,
  0.775,
  0.775,
  0.7694,
  0.763,
  0.7755,
  0.7717,
  0.7686,
  0.7747,
  0.7691,
  0.7722,
  0.7691,
  0.7719,
  0.7758,
  0.7776,
  0.777,
  0.7814,
  0.7752,
  0.7742,
  0.7776,
  0.7737,
  0.7816,
  0.7783,
  0.7755,
  0.7816,
  0.7801,
  0.7806,
  0.7722,
  0.7786,
  0.7827,
  0.7801,
  0.7819,
  0.7819,
  0.7755,
  0.7801,
  0.7768,
  0.7837,
  0.7806,
  0.7811,
  0.7842,
  0.7814,
  0.7832,
  0.7829,
  0.78

# glove + for/backward flair embeddings

In [4]:
torch.cuda.empty_cache()
word_embeddings = [WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward')]

document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=256)

classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

trainer = ModelTrainer(classifier, corpus)

trainer.train('classifiers/spooky_authorship_classifier_glove&news-fbflair',
              learning_rate=0.1,
              mini_batch_size=16,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

2020-10-22 05:39:29,219 ----------------------------------------------------------------------------------------------------
2020-10-22 05:39:29,220 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
          (decoder): Linear(in_features=2048, out_features=300, bias=True)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
          (decoder): Linear(in_features=2048, out_features=300, bias=True)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=4196, out_features=4196, bias=True)
    (rnn): GRU(4196, 256, 

2020-10-22 05:55:14,081 EPOCH 4 done: loss 0.7073 - lr 0.1000000
2020-10-22 05:56:14,354 DEV : loss 1.021492600440979 - score 0.5883
2020-10-22 05:56:16,260 BAD EPOCHS (no improvement): 1
2020-10-22 05:56:16,261 ----------------------------------------------------------------------------------------------------
2020-10-22 05:56:36,800 epoch 5 - iter 73/736 - loss 0.67823635 - samples/sec: 58.21 - lr: 0.100000
2020-10-22 05:56:55,847 epoch 5 - iter 146/736 - loss 0.70018424 - samples/sec: 62.51 - lr: 0.100000
2020-10-22 05:57:14,514 epoch 5 - iter 219/736 - loss 0.68725666 - samples/sec: 63.77 - lr: 0.100000
2020-10-22 05:57:33,527 epoch 5 - iter 292/736 - loss 0.66753892 - samples/sec: 61.70 - lr: 0.100000
2020-10-22 05:57:51,318 epoch 5 - iter 365/736 - loss 0.66906751 - samples/sec: 67.03 - lr: 0.100000
2020-10-22 05:58:10,088 epoch 5 - iter 438/736 - loss 0.67636088 - samples/sec: 63.49 - lr: 0.100000
2020-10-22 05:58:28,429 epoch 5 - iter 511/736 - loss 0.68132020 - samples/sec: 64

2020-10-22 06:19:22,747 epoch 10 - iter 511/736 - loss 0.56972459 - samples/sec: 64.91 - lr: 0.100000
2020-10-22 06:19:41,728 epoch 10 - iter 584/736 - loss 0.56812148 - samples/sec: 61.84 - lr: 0.100000
2020-10-22 06:20:00,440 epoch 10 - iter 657/736 - loss 0.57327429 - samples/sec: 63.72 - lr: 0.100000
2020-10-22 06:20:18,870 epoch 10 - iter 730/736 - loss 0.57654182 - samples/sec: 64.66 - lr: 0.100000
2020-10-22 06:20:20,272 ----------------------------------------------------------------------------------------------------
2020-10-22 06:20:20,273 EPOCH 10 done: loss 0.5753 - lr 0.1000000
2020-10-22 06:21:20,555 DEV : loss 0.5725451707839966 - score 0.7683
2020-10-22 06:21:22,450 BAD EPOCHS (no improvement): 1
2020-10-22 06:21:22,451 ----------------------------------------------------------------------------------------------------
2020-10-22 06:21:41,893 epoch 11 - iter 73/736 - loss 0.48916072 - samples/sec: 61.57 - lr: 0.100000
2020-10-22 06:22:00,466 epoch 11 - iter 146/736 - l

2020-10-22 06:42:54,838 epoch 16 - iter 146/736 - loss 0.48044030 - samples/sec: 62.06 - lr: 0.100000
2020-10-22 06:43:13,309 epoch 16 - iter 219/736 - loss 0.48963711 - samples/sec: 65.41 - lr: 0.100000
2020-10-22 06:43:32,176 epoch 16 - iter 292/736 - loss 0.49337707 - samples/sec: 63.12 - lr: 0.100000
2020-10-22 06:43:49,431 epoch 16 - iter 365/736 - loss 0.48550541 - samples/sec: 69.08 - lr: 0.100000
2020-10-22 06:44:08,423 epoch 16 - iter 438/736 - loss 0.48504848 - samples/sec: 62.65 - lr: 0.100000
2020-10-22 06:44:26,719 epoch 16 - iter 511/736 - loss 0.48881208 - samples/sec: 64.10 - lr: 0.100000
2020-10-22 06:44:45,827 epoch 16 - iter 584/736 - loss 0.48812099 - samples/sec: 62.30 - lr: 0.100000
2020-10-22 06:45:04,432 epoch 16 - iter 657/736 - loss 0.49027505 - samples/sec: 64.02 - lr: 0.100000
2020-10-22 06:45:24,350 epoch 16 - iter 730/736 - loss 0.48663807 - samples/sec: 60.54 - lr: 0.100000
2020-10-22 06:45:25,830 ----------------------------------------------------------

2020-10-22 07:06:18,648 ----------------------------------------------------------------------------------------------------
2020-10-22 07:06:18,648 EPOCH 21 done: loss 0.4434 - lr 0.1000000
2020-10-22 07:07:18,931 DEV : loss 0.5002753734588623 - score 0.8057
2020-10-22 07:07:20,837 BAD EPOCHS (no improvement): 0
saving best model
2020-10-22 07:07:22,824 ----------------------------------------------------------------------------------------------------
2020-10-22 07:07:41,767 epoch 22 - iter 73/736 - loss 0.38669641 - samples/sec: 64.27 - lr: 0.100000
2020-10-22 07:07:59,639 epoch 22 - iter 146/736 - loss 0.40790390 - samples/sec: 66.63 - lr: 0.100000
2020-10-22 07:08:19,275 epoch 22 - iter 219/736 - loss 0.42489670 - samples/sec: 59.77 - lr: 0.100000
2020-10-22 07:08:38,260 epoch 22 - iter 292/736 - loss 0.41745021 - samples/sec: 62.65 - lr: 0.100000
2020-10-22 07:08:56,715 epoch 22 - iter 365/736 - loss 0.42424755 - samples/sec: 64.51 - lr: 0.100000
2020-10-22 07:09:15,882 epoch 22 

2020-10-22 07:30:09,852 epoch 27 - iter 438/736 - loss 0.38299003 - samples/sec: 62.17 - lr: 0.100000
2020-10-22 07:30:28,189 epoch 27 - iter 511/736 - loss 0.38922438 - samples/sec: 63.97 - lr: 0.100000
2020-10-22 07:30:46,530 epoch 27 - iter 584/736 - loss 0.39161635 - samples/sec: 63.96 - lr: 0.100000
2020-10-22 07:31:04,957 epoch 27 - iter 657/736 - loss 0.39466972 - samples/sec: 64.67 - lr: 0.100000
2020-10-22 07:31:22,770 epoch 27 - iter 730/736 - loss 0.39801308 - samples/sec: 67.94 - lr: 0.100000
2020-10-22 07:31:24,040 ----------------------------------------------------------------------------------------------------
2020-10-22 07:31:24,040 EPOCH 27 done: loss 0.3973 - lr 0.1000000
2020-10-22 07:32:24,425 DEV : loss 0.5126873850822449 - score 0.8072
2020-10-22 07:32:26,329 BAD EPOCHS (no improvement): 4
2020-10-22 07:32:26,330 ----------------------------------------------------------------------------------------------------
2020-10-22 07:32:44,142 epoch 28 - iter 73/736 - l

saving best model
2020-10-22 07:53:24,767 ----------------------------------------------------------------------------------------------------
2020-10-22 07:53:42,727 epoch 33 - iter 73/736 - loss 0.23453749 - samples/sec: 66.84 - lr: 0.050000
2020-10-22 07:54:00,536 epoch 33 - iter 146/736 - loss 0.25535620 - samples/sec: 65.87 - lr: 0.050000
2020-10-22 07:54:19,779 epoch 33 - iter 219/736 - loss 0.25479829 - samples/sec: 61.00 - lr: 0.050000
2020-10-22 07:54:39,943 epoch 33 - iter 292/736 - loss 0.26273323 - samples/sec: 58.96 - lr: 0.050000
2020-10-22 07:54:58,559 epoch 33 - iter 365/736 - loss 0.26508840 - samples/sec: 64.00 - lr: 0.050000
2020-10-22 07:55:17,063 epoch 33 - iter 438/736 - loss 0.26743068 - samples/sec: 64.40 - lr: 0.050000
2020-10-22 07:55:36,426 epoch 33 - iter 511/736 - loss 0.26624148 - samples/sec: 62.36 - lr: 0.050000
2020-10-22 07:55:55,827 epoch 33 - iter 584/736 - loss 0.26869872 - samples/sec: 61.35 - lr: 0.050000
2020-10-22 07:56:13,845 epoch 33 - iter 65

2020-10-22 08:17:05,312 epoch 38 - iter 657/736 - loss 0.24077881 - samples/sec: 68.55 - lr: 0.050000
2020-10-22 08:17:23,558 epoch 38 - iter 730/736 - loss 0.24036327 - samples/sec: 64.26 - lr: 0.050000
2020-10-22 08:17:24,922 ----------------------------------------------------------------------------------------------------
2020-10-22 08:17:24,923 EPOCH 38 done: loss 0.2415 - lr 0.0500000
2020-10-22 08:18:25,209 DEV : loss 0.5293952822685242 - score 0.8249
2020-10-22 08:18:27,115 BAD EPOCHS (no improvement): 1
2020-10-22 08:18:27,116 ----------------------------------------------------------------------------------------------------
2020-10-22 08:18:45,612 epoch 39 - iter 73/736 - loss 0.23415036 - samples/sec: 64.81 - lr: 0.050000
2020-10-22 08:19:03,479 epoch 39 - iter 146/736 - loss 0.22992241 - samples/sec: 66.68 - lr: 0.050000
2020-10-22 08:19:21,679 epoch 39 - iter 219/736 - loss 0.22542867 - samples/sec: 65.44 - lr: 0.050000
2020-10-22 08:19:40,190 epoch 39 - iter 292/736 - l

2020-10-22 08:40:34,225 epoch 44 - iter 292/736 - loss 0.21702861 - samples/sec: 65.73 - lr: 0.050000
2020-10-22 08:40:52,513 epoch 44 - iter 365/736 - loss 0.21923958 - samples/sec: 64.20 - lr: 0.050000
2020-10-22 08:41:10,934 epoch 44 - iter 438/736 - loss 0.21762593 - samples/sec: 65.72 - lr: 0.050000
2020-10-22 08:41:29,587 epoch 44 - iter 511/736 - loss 0.21971656 - samples/sec: 62.98 - lr: 0.050000
2020-10-22 08:41:48,904 epoch 44 - iter 584/736 - loss 0.21976363 - samples/sec: 60.81 - lr: 0.050000
2020-10-22 08:42:07,012 epoch 44 - iter 657/736 - loss 0.21767871 - samples/sec: 65.88 - lr: 0.050000
2020-10-22 08:42:26,709 epoch 44 - iter 730/736 - loss 0.21686411 - samples/sec: 60.43 - lr: 0.050000
2020-10-22 08:42:28,180 ----------------------------------------------------------------------------------------------------
2020-10-22 08:42:28,181 EPOCH 44 done: loss 0.2161 - lr 0.0500000
2020-10-22 08:43:28,618 DEV : loss 0.5766856074333191 - score 0.8295
2020-10-22 08:43:30,531 BA

2020-10-22 09:03:17,522 EPOCH 49 done: loss 0.1627 - lr 0.0250000
2020-10-22 09:04:18,008 DEV : loss 0.579181432723999 - score 0.8346
2020-10-22 09:04:19,898 BAD EPOCHS (no improvement): 3
2020-10-22 09:04:19,899 ----------------------------------------------------------------------------------------------------
2020-10-22 09:04:39,092 epoch 50 - iter 73/736 - loss 0.13416733 - samples/sec: 62.50 - lr: 0.025000
2020-10-22 09:04:58,199 epoch 50 - iter 146/736 - loss 0.14528287 - samples/sec: 62.23 - lr: 0.025000
2020-10-22 09:05:17,877 epoch 50 - iter 219/736 - loss 0.14773228 - samples/sec: 61.35 - lr: 0.025000
2020-10-22 09:05:35,464 epoch 50 - iter 292/736 - loss 0.15245274 - samples/sec: 66.67 - lr: 0.025000
2020-10-22 09:05:54,075 epoch 50 - iter 365/736 - loss 0.14650395 - samples/sec: 63.03 - lr: 0.025000
2020-10-22 09:06:13,033 epoch 50 - iter 438/736 - loss 0.14865621 - samples/sec: 62.75 - lr: 0.025000
2020-10-22 09:06:31,650 epoch 50 - iter 511/736 - loss 0.14715849 - samples

2020-10-22 09:27:28,578 epoch 55 - iter 511/736 - loss 0.13697929 - samples/sec: 62.25 - lr: 0.025000
2020-10-22 09:27:47,605 epoch 55 - iter 584/736 - loss 0.13978442 - samples/sec: 62.58 - lr: 0.025000
2020-10-22 09:28:04,620 epoch 55 - iter 657/736 - loss 0.13966030 - samples/sec: 70.13 - lr: 0.025000
2020-10-22 09:28:22,806 epoch 55 - iter 730/736 - loss 0.13790800 - samples/sec: 65.56 - lr: 0.025000
2020-10-22 09:28:24,364 ----------------------------------------------------------------------------------------------------
2020-10-22 09:28:24,365 EPOCH 55 done: loss 0.1377 - lr 0.0250000
2020-10-22 09:29:24,806 DEV : loss 0.6079531908035278 - score 0.8338
2020-10-22 09:29:26,695 BAD EPOCHS (no improvement): 3
2020-10-22 09:29:26,696 ----------------------------------------------------------------------------------------------------
2020-10-22 09:29:44,872 epoch 56 - iter 73/736 - loss 0.12719105 - samples/sec: 65.00 - lr: 0.025000
2020-10-22 09:30:02,223 epoch 56 - iter 146/736 - l

2020-10-22 09:50:42,944 epoch 61 - iter 73/736 - loss 0.11022888 - samples/sec: 59.16 - lr: 0.012500
2020-10-22 09:51:01,108 epoch 61 - iter 146/736 - loss 0.10588165 - samples/sec: 65.60 - lr: 0.012500
2020-10-22 09:51:18,552 epoch 61 - iter 219/736 - loss 0.10752810 - samples/sec: 67.29 - lr: 0.012500
2020-10-22 09:51:37,543 epoch 61 - iter 292/736 - loss 0.11056165 - samples/sec: 62.65 - lr: 0.012500
2020-10-22 09:51:55,059 epoch 61 - iter 365/736 - loss 0.10871672 - samples/sec: 68.06 - lr: 0.012500
2020-10-22 09:52:12,391 epoch 61 - iter 438/736 - loss 0.10873290 - samples/sec: 67.69 - lr: 0.012500
2020-10-22 09:52:31,414 epoch 61 - iter 511/736 - loss 0.11124307 - samples/sec: 63.48 - lr: 0.012500
2020-10-22 09:52:49,675 epoch 61 - iter 584/736 - loss 0.11331592 - samples/sec: 65.29 - lr: 0.012500
2020-10-22 09:53:08,936 epoch 61 - iter 657/736 - loss 0.11296281 - samples/sec: 61.79 - lr: 0.012500
2020-10-22 09:53:28,553 epoch 61 - iter 730/736 - loss 0.11261002 - samples/sec: 60

2020-10-22 10:14:23,112 epoch 66 - iter 730/736 - loss 0.10744322 - samples/sec: 61.62 - lr: 0.012500
2020-10-22 10:14:24,379 ----------------------------------------------------------------------------------------------------
2020-10-22 10:14:24,379 EPOCH 66 done: loss 0.1072 - lr 0.0125000
2020-10-22 10:15:25,448 DEV : loss 0.6417397260665894 - score 0.8399
2020-10-22 10:15:27,338 BAD EPOCHS (no improvement): 2
2020-10-22 10:15:27,338 ----------------------------------------------------------------------------------------------------
2020-10-22 10:16:06,362 epoch 67 - iter 73/736 - loss 0.09482648 - samples/sec: 30.08 - lr: 0.012500
2020-10-22 10:16:49,911 epoch 67 - iter 146/736 - loss 0.09827320 - samples/sec: 27.22 - lr: 0.012500
2020-10-22 10:17:30,983 epoch 67 - iter 219/736 - loss 0.10084235 - samples/sec: 28.49 - lr: 0.012500
2020-10-22 10:18:10,353 epoch 67 - iter 292/736 - loss 0.10215528 - samples/sec: 29.95 - lr: 0.012500
2020-10-22 10:18:51,695 epoch 67 - iter 365/736 - l

2020-10-22 10:48:59,261 epoch 72 - iter 365/736 - loss 0.08501685 - samples/sec: 60.70 - lr: 0.012500
2020-10-22 10:49:17,211 epoch 72 - iter 438/736 - loss 0.09218320 - samples/sec: 66.40 - lr: 0.012500
2020-10-22 10:49:36,267 epoch 72 - iter 511/736 - loss 0.09539300 - samples/sec: 61.60 - lr: 0.012500
2020-10-22 10:49:55,087 epoch 72 - iter 584/736 - loss 0.09630505 - samples/sec: 63.29 - lr: 0.012500
2020-10-22 10:50:14,005 epoch 72 - iter 657/736 - loss 0.09732311 - samples/sec: 62.94 - lr: 0.012500
2020-10-22 10:50:32,165 epoch 72 - iter 730/736 - loss 0.09625988 - samples/sec: 66.61 - lr: 0.012500
2020-10-22 10:50:33,639 ----------------------------------------------------------------------------------------------------
2020-10-22 10:50:33,640 EPOCH 72 done: loss 0.0965 - lr 0.0125000
2020-10-22 10:51:35,938 DEV : loss 0.6853747963905334 - score 0.8394
2020-10-22 10:51:37,882 BAD EPOCHS (no improvement): 4
2020-10-22 10:51:37,883 -------------------------------------------------

2020-10-22 11:12:50,670 BAD EPOCHS (no improvement): 3
2020-10-22 11:12:50,671 ----------------------------------------------------------------------------------------------------
2020-10-22 11:13:10,431 epoch 78 - iter 73/736 - loss 0.06465170 - samples/sec: 59.77 - lr: 0.006250
2020-10-22 11:13:29,470 epoch 78 - iter 146/736 - loss 0.08229151 - samples/sec: 61.62 - lr: 0.006250
2020-10-22 11:13:49,259 epoch 78 - iter 219/736 - loss 0.08804072 - samples/sec: 60.15 - lr: 0.006250
2020-10-22 11:14:07,487 epoch 78 - iter 292/736 - loss 0.09882081 - samples/sec: 65.43 - lr: 0.006250
2020-10-22 11:14:26,610 epoch 78 - iter 365/736 - loss 0.09641091 - samples/sec: 62.28 - lr: 0.006250
2020-10-22 11:14:45,055 epoch 78 - iter 438/736 - loss 0.09621811 - samples/sec: 63.57 - lr: 0.006250
2020-10-22 11:15:04,770 epoch 78 - iter 511/736 - loss 0.09571910 - samples/sec: 59.48 - lr: 0.006250
2020-10-22 11:15:23,205 epoch 78 - iter 584/736 - loss 0.09269978 - samples/sec: 64.62 - lr: 0.006250
2020-

2020-10-22 11:36:33,666 epoch 83 - iter 584/736 - loss 0.07599921 - samples/sec: 66.01 - lr: 0.003125
2020-10-22 11:36:52,749 epoch 83 - iter 657/736 - loss 0.07653315 - samples/sec: 62.38 - lr: 0.003125
2020-10-22 11:37:10,163 epoch 83 - iter 730/736 - loss 0.07587821 - samples/sec: 68.45 - lr: 0.003125
2020-10-22 11:37:11,355 ----------------------------------------------------------------------------------------------------
2020-10-22 11:37:11,356 EPOCH 83 done: loss 0.0756 - lr 0.0031250
2020-10-22 11:38:11,947 DEV : loss 0.7091188430786133 - score 0.8407
2020-10-22 11:38:13,889 BAD EPOCHS (no improvement): 3
2020-10-22 11:38:13,890 ----------------------------------------------------------------------------------------------------
2020-10-22 11:38:33,996 epoch 84 - iter 73/736 - loss 0.06955129 - samples/sec: 58.73 - lr: 0.003125
2020-10-22 11:38:55,620 epoch 84 - iter 146/736 - loss 0.07719317 - samples/sec: 54.96 - lr: 0.003125
2020-10-22 11:39:14,237 epoch 84 - iter 219/736 - l

2020-10-22 12:00:13,608 epoch 89 - iter 146/736 - loss 0.06770018 - samples/sec: 61.04 - lr: 0.001563
2020-10-22 12:00:32,132 epoch 89 - iter 219/736 - loss 0.07738310 - samples/sec: 65.33 - lr: 0.001563
2020-10-22 12:00:50,562 epoch 89 - iter 292/736 - loss 0.07864328 - samples/sec: 63.62 - lr: 0.001563
2020-10-22 12:01:10,046 epoch 89 - iter 365/736 - loss 0.08015385 - samples/sec: 61.08 - lr: 0.001563
2020-10-22 12:01:27,362 epoch 89 - iter 438/736 - loss 0.07869211 - samples/sec: 68.88 - lr: 0.001563
2020-10-22 12:01:46,727 epoch 89 - iter 511/736 - loss 0.07833439 - samples/sec: 61.47 - lr: 0.001563
2020-10-22 12:02:05,154 epoch 89 - iter 584/736 - loss 0.07848135 - samples/sec: 63.66 - lr: 0.001563
2020-10-22 12:02:23,873 epoch 89 - iter 657/736 - loss 0.07773047 - samples/sec: 62.70 - lr: 0.001563
2020-10-22 12:02:41,515 epoch 89 - iter 730/736 - loss 0.07688140 - samples/sec: 67.54 - lr: 0.001563
2020-10-22 12:02:43,580 ----------------------------------------------------------

2020-10-22 12:23:42,991 ----------------------------------------------------------------------------------------------------
2020-10-22 12:23:42,992 EPOCH 94 done: loss 0.0692 - lr 0.0015625
2020-10-22 12:24:43,680 DEV : loss 0.7108713388442993 - score 0.8458
2020-10-22 12:24:45,599 BAD EPOCHS (no improvement): 1
2020-10-22 12:24:45,600 ----------------------------------------------------------------------------------------------------
2020-10-22 12:25:05,031 epoch 95 - iter 73/736 - loss 0.09707964 - samples/sec: 62.64 - lr: 0.001563
2020-10-22 12:25:23,738 epoch 95 - iter 146/736 - loss 0.09389795 - samples/sec: 62.71 - lr: 0.001563
2020-10-22 12:25:43,845 epoch 95 - iter 219/736 - loss 0.08636202 - samples/sec: 59.13 - lr: 0.001563
2020-10-22 12:26:01,966 epoch 95 - iter 292/736 - loss 0.07765328 - samples/sec: 65.76 - lr: 0.001563
2020-10-22 12:26:21,637 epoch 95 - iter 365/736 - loss 0.07632302 - samples/sec: 59.61 - lr: 0.001563
2020-10-22 12:26:40,497 epoch 95 - iter 438/736 - l

2020-10-22 12:47:14,543 epoch 100 - iter 365/736 - loss 0.07548293 - samples/sec: 64.33 - lr: 0.000781
2020-10-22 12:47:33,489 epoch 100 - iter 438/736 - loss 0.07493269 - samples/sec: 61.91 - lr: 0.000781
2020-10-22 12:47:53,510 epoch 100 - iter 511/736 - loss 0.07719064 - samples/sec: 60.26 - lr: 0.000781
2020-10-22 12:48:12,984 epoch 100 - iter 584/736 - loss 0.07749062 - samples/sec: 61.04 - lr: 0.000781
2020-10-22 12:48:30,609 epoch 100 - iter 657/736 - loss 0.07714540 - samples/sec: 66.54 - lr: 0.000781
2020-10-22 12:48:48,755 epoch 100 - iter 730/736 - loss 0.07807375 - samples/sec: 65.66 - lr: 0.000781
2020-10-22 12:48:50,392 ----------------------------------------------------------------------------------------------------
2020-10-22 12:48:50,393 EPOCH 100 done: loss 0.0782 - lr 0.0007813
2020-10-22 12:49:50,780 DEV : loss 0.7138060927391052 - score 0.8448
2020-10-22 12:49:52,667 BAD EPOCHS (no improvement): 1
2020-10-22 12:49:52,667 ------------------------------------------

Epoch   105: reducing learning rate of group 0 to 3.9063e-04.
2020-10-22 13:10:47,190 BAD EPOCHS (no improvement): 6
2020-10-22 13:10:47,191 ----------------------------------------------------------------------------------------------------
2020-10-22 13:11:05,877 epoch 106 - iter 73/736 - loss 0.06386210 - samples/sec: 64.16 - lr: 0.000391
2020-10-22 13:11:24,167 epoch 106 - iter 146/736 - loss 0.06407240 - samples/sec: 65.09 - lr: 0.000391
2020-10-22 13:11:41,890 epoch 106 - iter 219/736 - loss 0.06618554 - samples/sec: 66.18 - lr: 0.000391
2020-10-22 13:11:59,351 epoch 106 - iter 292/736 - loss 0.07083982 - samples/sec: 68.22 - lr: 0.000391
2020-10-22 13:12:18,912 epoch 106 - iter 365/736 - loss 0.07453130 - samples/sec: 60.78 - lr: 0.000391
2020-10-22 13:12:36,366 epoch 106 - iter 438/736 - loss 0.07466258 - samples/sec: 67.22 - lr: 0.000391
2020-10-22 13:12:56,661 epoch 106 - iter 511/736 - loss 0.07369853 - samples/sec: 58.56 - lr: 0.000391
2020-10-22 13:13:15,057 epoch 106 - it

2020-10-22 13:33:58,783 epoch 111 - iter 511/736 - loss 0.07121711 - samples/sec: 62.55 - lr: 0.000391
2020-10-22 13:34:18,387 epoch 111 - iter 584/736 - loss 0.07290009 - samples/sec: 60.75 - lr: 0.000391
2020-10-22 13:34:38,230 epoch 111 - iter 657/736 - loss 0.07342869 - samples/sec: 60.82 - lr: 0.000391
2020-10-22 13:34:55,461 epoch 111 - iter 730/736 - loss 0.07343918 - samples/sec: 68.20 - lr: 0.000391
2020-10-22 13:34:56,786 ----------------------------------------------------------------------------------------------------
2020-10-22 13:34:56,787 EPOCH 111 done: loss 0.0734 - lr 0.0003906
2020-10-22 13:35:57,624 DEV : loss 0.7158766388893127 - score 0.8453
Epoch   111: reducing learning rate of group 0 to 1.9531e-04.
2020-10-22 13:35:59,518 BAD EPOCHS (no improvement): 6
2020-10-22 13:35:59,519 ----------------------------------------------------------------------------------------------------
2020-10-22 13:36:18,497 epoch 112 - iter 73/736 - loss 0.06487043 - samples/sec: 62.2

2020-10-22 13:57:15,598 epoch 117 - iter 73/736 - loss 0.07890967 - samples/sec: 61.11 - lr: 0.000195
2020-10-22 13:57:33,078 epoch 117 - iter 146/736 - loss 0.09450423 - samples/sec: 67.14 - lr: 0.000195
2020-10-22 13:57:52,854 epoch 117 - iter 219/736 - loss 0.08524608 - samples/sec: 59.35 - lr: 0.000195
2020-10-22 13:58:11,174 epoch 117 - iter 292/736 - loss 0.08235634 - samples/sec: 65.02 - lr: 0.000195
2020-10-22 13:58:29,317 epoch 117 - iter 365/736 - loss 0.07857722 - samples/sec: 65.77 - lr: 0.000195
2020-10-22 13:58:48,658 epoch 117 - iter 438/736 - loss 0.07764626 - samples/sec: 61.59 - lr: 0.000195
2020-10-22 13:59:07,196 epoch 117 - iter 511/736 - loss 0.07607066 - samples/sec: 64.25 - lr: 0.000195
2020-10-22 13:59:26,489 epoch 117 - iter 584/736 - loss 0.07461531 - samples/sec: 60.79 - lr: 0.000195
2020-10-22 13:59:45,802 epoch 117 - iter 657/736 - loss 0.07294729 - samples/sec: 61.57 - lr: 0.000195
2020-10-22 14:00:04,218 epoch 117 - iter 730/736 - loss 0.07321712 - sampl

{'test_score': 0.8479,
 'dev_score_history': [0.5313,
  0.5973,
  0.6129,
  0.5883,
  0.7433,
  0.7686,
  0.7246,
  0.7428,
  0.776,
  0.7683,
  0.764,
  0.7891,
  0.7415,
  0.7954,
  0.7724,
  0.7919,
  0.7384,
  0.7908,
  0.7768,
  0.7977,
  0.8057,
  0.7763,
  0.8113,
  0.798,
  0.7949,
  0.787,
  0.8072,
  0.8021,
  0.785,
  0.8179,
  0.8192,
  0.8259,
  0.8307,
  0.8213,
  0.8151,
  0.8266,
  0.832,
  0.8249,
  0.8297,
  0.8346,
  0.8266,
  0.8289,
  0.8233,
  0.8295,
  0.8249,
  0.8246,
  0.8292,
  0.8333,
  0.8346,
  0.8366,
  0.8333,
  0.8394,
  0.8338,
  0.8346,
  0.8338,
  0.8284,
  0.8394,
  0.8335,
  0.8415,
  0.8394,
  0.8402,
  0.8394,
  0.8399,
  0.843,
  0.8425,
  0.8399,
  0.8374,
  0.844,
  0.8405,
  0.842,
  0.8387,
  0.8394,
  0.842,
  0.8381,
  0.8422,
  0.842,
  0.8407,
  0.8417,
  0.8402,
  0.8415,
  0.841,
  0.8435,
  0.8407,
  0.8438,
  0.8435,
  0.8402,
  0.8453,
  0.8428,
  0.8458,
  0.844,
  0.8445,
  0.8438,
  0.8458,
  0.8458,
  0.8448,
  0.8456,
  0.8435,

# bert embeddings

In [5]:
torch.cuda.empty_cache()

word_embeddings = [TransformerWordEmbeddings('bert-base-uncased')]

# 4. initialize document embedding by passing list of word embeddings
# Can choose between many RNN types (GRU by default, to change use rnn_type parameter)
document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=256)

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('classifiers/spooky_authorship_classifier_bert',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

2020-10-22 14:02:09,538 ----------------------------------------------------------------------------------------------------
2020-10-22 14:02:09,540 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): TransformerWordEmbeddings(
        (model): BertModel(
          (embeddings): BertEmbeddings(
            (word_embeddings): Embedding(30522, 768, padding_idx=0)
            (position_embeddings): Embedding(512, 768)
            (token_type_embeddings): Embedding(2, 768)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (encoder): BertEncoder(
            (layer): ModuleList(
              (0): BertLayer(
                (attention): BertAttention(
                  (self): BertSelfAttention(
                    (query): Linear(in_features=768, out_features=768, bias=True)
                    (key): Line

2020-10-22 14:02:09,541 ----------------------------------------------------------------------------------------------------
2020-10-22 14:02:09,541 Corpus: "Corpus: 11762 train + 3911 dev + 3906 test sentences"
2020-10-22 14:02:09,542 ----------------------------------------------------------------------------------------------------
2020-10-22 14:02:09,542 Parameters:
2020-10-22 14:02:09,542  - learning_rate: "0.1"
2020-10-22 14:02:09,543  - mini_batch_size: "32"
2020-10-22 14:02:09,543  - patience: "5"
2020-10-22 14:02:09,543  - anneal_factor: "0.5"
2020-10-22 14:02:09,544  - max_epochs: "150"
2020-10-22 14:02:09,544  - shuffle: "True"
2020-10-22 14:02:09,544  - train_with_dev: "False"
2020-10-22 14:02:09,544  - batch_growth_annealing: "False"
2020-10-22 14:02:09,545 ----------------------------------------------------------------------------------------------------
2020-10-22 14:02:09,545 Model training base path: "classifiers/spooky_authorship_classifier_bert"
2020-10-22 14:02:09,

2020-10-22 14:19:36,282 ----------------------------------------------------------------------------------------------------
2020-10-22 14:19:36,282 EPOCH 5 done: loss 0.5798 - lr 0.1000000
2020-10-22 14:20:23,867 DEV : loss 0.7465242147445679 - score 0.6891
2020-10-22 14:20:25,766 BAD EPOCHS (no improvement): 2
2020-10-22 14:20:25,767 ----------------------------------------------------------------------------------------------------
2020-10-22 14:20:42,654 epoch 6 - iter 36/368 - loss 0.51457353 - samples/sec: 70.37 - lr: 0.100000
2020-10-22 14:20:59,459 epoch 6 - iter 72/368 - loss 0.51202875 - samples/sec: 69.80 - lr: 0.100000
2020-10-22 14:21:16,020 epoch 6 - iter 108/368 - loss 0.53477021 - samples/sec: 70.87 - lr: 0.100000
2020-10-22 14:21:32,424 epoch 6 - iter 144/368 - loss 0.54743409 - samples/sec: 71.59 - lr: 0.100000
2020-10-22 14:21:48,974 epoch 6 - iter 180/368 - loss 0.54946897 - samples/sec: 70.98 - lr: 0.100000
2020-10-22 14:22:05,394 epoch 6 - iter 216/368 - loss 0.55

2020-10-22 14:40:22,904 epoch 11 - iter 216/368 - loss 0.48324032 - samples/sec: 71.90 - lr: 0.100000
2020-10-22 14:40:38,993 epoch 11 - iter 252/368 - loss 0.48514123 - samples/sec: 71.89 - lr: 0.100000
2020-10-22 14:40:55,322 epoch 11 - iter 288/368 - loss 0.49061823 - samples/sec: 71.89 - lr: 0.100000
2020-10-22 14:41:11,747 epoch 11 - iter 324/368 - loss 0.49279711 - samples/sec: 71.46 - lr: 0.100000
2020-10-22 14:41:28,560 epoch 11 - iter 360/368 - loss 0.49154352 - samples/sec: 69.85 - lr: 0.100000
2020-10-22 14:41:32,344 ----------------------------------------------------------------------------------------------------
2020-10-22 14:41:32,345 EPOCH 11 done: loss 0.4918 - lr 0.1000000
2020-10-22 14:42:19,870 DEV : loss 0.5237590074539185 - score 0.7786
2020-10-22 14:42:21,743 BAD EPOCHS (no improvement): 1
2020-10-22 14:42:21,744 ----------------------------------------------------------------------------------------------------
2020-10-22 14:42:38,845 epoch 12 - iter 36/368 - l

2020-10-22 15:00:54,335 epoch 17 - iter 36/368 - loss 0.46092704 - samples/sec: 68.99 - lr: 0.100000
2020-10-22 15:01:10,281 epoch 17 - iter 72/368 - loss 0.47088402 - samples/sec: 72.52 - lr: 0.100000
2020-10-22 15:01:27,102 epoch 17 - iter 108/368 - loss 0.46656210 - samples/sec: 69.79 - lr: 0.100000
2020-10-22 15:01:43,738 epoch 17 - iter 144/368 - loss 0.46607319 - samples/sec: 70.57 - lr: 0.100000
2020-10-22 15:02:00,426 epoch 17 - iter 180/368 - loss 0.46985867 - samples/sec: 70.38 - lr: 0.100000
2020-10-22 15:02:17,203 epoch 17 - iter 216/368 - loss 0.46655500 - samples/sec: 69.98 - lr: 0.100000
2020-10-22 15:02:33,734 epoch 17 - iter 252/368 - loss 0.46984156 - samples/sec: 71.03 - lr: 0.100000
2020-10-22 15:02:50,007 epoch 17 - iter 288/368 - loss 0.46923452 - samples/sec: 71.10 - lr: 0.100000
2020-10-22 15:03:06,502 epoch 17 - iter 324/368 - loss 0.46761320 - samples/sec: 71.21 - lr: 0.100000
2020-10-22 15:03:23,098 epoch 17 - iter 360/368 - loss 0.46933555 - samples/sec: 70.

2020-10-22 15:21:33,280 epoch 22 - iter 360/368 - loss 0.45552193 - samples/sec: 71.18 - lr: 0.100000
2020-10-22 15:21:36,612 ----------------------------------------------------------------------------------------------------
2020-10-22 15:21:36,613 EPOCH 22 done: loss 0.4553 - lr 0.1000000
2020-10-22 15:22:23,942 DEV : loss 0.47379186749458313 - score 0.8062
2020-10-22 15:22:25,849 BAD EPOCHS (no improvement): 1
2020-10-22 15:22:25,850 ----------------------------------------------------------------------------------------------------
2020-10-22 15:22:43,171 epoch 23 - iter 36/368 - loss 0.44576302 - samples/sec: 68.52 - lr: 0.100000
2020-10-22 15:22:59,609 epoch 23 - iter 72/368 - loss 0.45860343 - samples/sec: 70.37 - lr: 0.100000
2020-10-22 15:23:16,013 epoch 23 - iter 108/368 - loss 0.45266330 - samples/sec: 71.56 - lr: 0.100000
2020-10-22 15:23:32,584 epoch 23 - iter 144/368 - loss 0.44390097 - samples/sec: 70.88 - lr: 0.100000
2020-10-22 15:23:49,304 epoch 23 - iter 180/368 - l

2020-10-22 15:41:55,560 epoch 28 - iter 180/368 - loss 0.41090741 - samples/sec: 71.43 - lr: 0.100000
2020-10-22 15:42:11,967 epoch 28 - iter 216/368 - loss 0.40672961 - samples/sec: 71.55 - lr: 0.100000
2020-10-22 15:42:28,310 epoch 28 - iter 252/368 - loss 0.41314283 - samples/sec: 71.87 - lr: 0.100000
2020-10-22 15:42:44,670 epoch 28 - iter 288/368 - loss 0.41700177 - samples/sec: 71.77 - lr: 0.100000
2020-10-22 15:43:01,148 epoch 28 - iter 324/368 - loss 0.41971990 - samples/sec: 71.25 - lr: 0.100000
2020-10-22 15:43:17,377 epoch 28 - iter 360/368 - loss 0.42008202 - samples/sec: 71.28 - lr: 0.100000
2020-10-22 15:43:21,187 ----------------------------------------------------------------------------------------------------
2020-10-22 15:43:21,188 EPOCH 28 done: loss 0.4210 - lr 0.1000000
2020-10-22 15:44:08,112 DEV : loss 0.5684040784835815 - score 0.7783
2020-10-22 15:44:09,978 BAD EPOCHS (no improvement): 4
2020-10-22 15:44:09,979 -------------------------------------------------

2020-10-22 16:02:17,971 BAD EPOCHS (no improvement): 2
2020-10-22 16:02:17,972 ----------------------------------------------------------------------------------------------------
2020-10-22 16:02:34,973 epoch 34 - iter 36/368 - loss 0.39096973 - samples/sec: 69.92 - lr: 0.050000
2020-10-22 16:02:51,733 epoch 34 - iter 72/368 - loss 0.36852365 - samples/sec: 70.07 - lr: 0.050000
2020-10-22 16:03:07,814 epoch 34 - iter 108/368 - loss 0.35721548 - samples/sec: 71.95 - lr: 0.050000
2020-10-22 16:03:24,098 epoch 34 - iter 144/368 - loss 0.35346020 - samples/sec: 71.03 - lr: 0.050000
2020-10-22 16:03:40,503 epoch 34 - iter 180/368 - loss 0.35324210 - samples/sec: 71.54 - lr: 0.050000
2020-10-22 16:03:56,979 epoch 34 - iter 216/368 - loss 0.34984069 - samples/sec: 71.28 - lr: 0.050000
2020-10-22 16:04:13,262 epoch 34 - iter 252/368 - loss 0.34956528 - samples/sec: 72.05 - lr: 0.050000
2020-10-22 16:04:29,989 epoch 34 - iter 288/368 - loss 0.34862567 - samples/sec: 70.21 - lr: 0.050000
2020-1

2020-10-22 16:22:43,349 epoch 39 - iter 288/368 - loss 0.31412578 - samples/sec: 71.11 - lr: 0.050000
2020-10-22 16:22:59,541 epoch 39 - iter 324/368 - loss 0.31595787 - samples/sec: 71.45 - lr: 0.050000
2020-10-22 16:23:16,067 epoch 39 - iter 360/368 - loss 0.31840097 - samples/sec: 71.05 - lr: 0.050000
2020-10-22 16:23:19,486 ----------------------------------------------------------------------------------------------------
2020-10-22 16:23:19,487 EPOCH 39 done: loss 0.3169 - lr 0.0500000
2020-10-22 16:24:07,634 DEV : loss 0.4162510633468628 - score 0.8374
2020-10-22 16:24:09,562 BAD EPOCHS (no improvement): 3
2020-10-22 16:24:09,562 ----------------------------------------------------------------------------------------------------
2020-10-22 16:24:26,174 epoch 40 - iter 36/368 - loss 0.34334941 - samples/sec: 71.58 - lr: 0.050000
2020-10-22 16:24:42,397 epoch 40 - iter 72/368 - loss 0.32011413 - samples/sec: 71.32 - lr: 0.050000
2020-10-22 16:24:58,834 epoch 40 - iter 108/368 - lo

2020-10-22 16:42:53,412 epoch 45 - iter 72/368 - loss 0.26322208 - samples/sec: 71.18 - lr: 0.025000
2020-10-22 16:43:09,747 epoch 45 - iter 108/368 - loss 0.27214624 - samples/sec: 71.96 - lr: 0.025000
2020-10-22 16:43:26,260 epoch 45 - iter 144/368 - loss 0.27711702 - samples/sec: 71.12 - lr: 0.025000
2020-10-22 16:43:42,549 epoch 45 - iter 180/368 - loss 0.27812835 - samples/sec: 71.02 - lr: 0.025000
2020-10-22 16:43:59,051 epoch 45 - iter 216/368 - loss 0.27919007 - samples/sec: 71.15 - lr: 0.025000
2020-10-22 16:44:15,497 epoch 45 - iter 252/368 - loss 0.27770449 - samples/sec: 71.38 - lr: 0.025000
2020-10-22 16:44:32,098 epoch 45 - iter 288/368 - loss 0.27876284 - samples/sec: 70.76 - lr: 0.025000
2020-10-22 16:44:48,714 epoch 45 - iter 324/368 - loss 0.27566485 - samples/sec: 70.73 - lr: 0.025000
2020-10-22 16:45:05,252 epoch 45 - iter 360/368 - loss 0.27688193 - samples/sec: 71.05 - lr: 0.025000
2020-10-22 16:45:08,852 -----------------------------------------------------------

2020-10-22 17:03:21,480 ----------------------------------------------------------------------------------------------------
2020-10-22 17:03:21,481 EPOCH 50 done: loss 0.2633 - lr 0.0250000
2020-10-22 17:04:08,941 DEV : loss 0.4068930447101593 - score 0.8433
2020-10-22 17:04:10,841 BAD EPOCHS (no improvement): 0
saving best model
2020-10-22 17:04:11,543 ----------------------------------------------------------------------------------------------------
2020-10-22 17:04:28,437 epoch 51 - iter 36/368 - loss 0.25183717 - samples/sec: 69.10 - lr: 0.025000
2020-10-22 17:04:45,103 epoch 51 - iter 72/368 - loss 0.23683839 - samples/sec: 70.43 - lr: 0.025000
2020-10-22 17:05:01,650 epoch 51 - iter 108/368 - loss 0.24111759 - samples/sec: 70.89 - lr: 0.025000
2020-10-22 17:05:18,145 epoch 51 - iter 144/368 - loss 0.24490976 - samples/sec: 71.18 - lr: 0.025000
2020-10-22 17:05:34,792 epoch 51 - iter 180/368 - loss 0.24088971 - samples/sec: 70.57 - lr: 0.025000
2020-10-22 17:05:51,464 epoch 51 -

2020-10-22 17:24:10,026 epoch 56 - iter 216/368 - loss 0.23087817 - samples/sec: 71.36 - lr: 0.025000
2020-10-22 17:24:26,460 epoch 56 - iter 252/368 - loss 0.23433180 - samples/sec: 70.39 - lr: 0.025000
2020-10-22 17:24:43,274 epoch 56 - iter 288/368 - loss 0.23352501 - samples/sec: 69.86 - lr: 0.025000
2020-10-22 17:25:00,140 epoch 56 - iter 324/368 - loss 0.23737051 - samples/sec: 69.57 - lr: 0.025000
2020-10-22 17:25:17,433 epoch 56 - iter 360/368 - loss 0.23993103 - samples/sec: 67.96 - lr: 0.025000
2020-10-22 17:25:21,005 ----------------------------------------------------------------------------------------------------
2020-10-22 17:25:21,006 EPOCH 56 done: loss 0.2392 - lr 0.0250000
2020-10-22 17:26:09,582 DEV : loss 0.42097073793411255 - score 0.8422
2020-10-22 17:26:11,488 BAD EPOCHS (no improvement): 3
2020-10-22 17:26:11,488 ----------------------------------------------------------------------------------------------------
2020-10-22 17:26:28,859 epoch 57 - iter 36/368 - 

2020-10-22 17:45:51,628 epoch 62 - iter 36/368 - loss 0.20883210 - samples/sec: 67.07 - lr: 0.025000
2020-10-22 17:46:08,270 epoch 62 - iter 72/368 - loss 0.21965398 - samples/sec: 69.55 - lr: 0.025000
2020-10-22 17:46:24,980 epoch 62 - iter 108/368 - loss 0.22604893 - samples/sec: 70.26 - lr: 0.025000
2020-10-22 17:46:41,456 epoch 62 - iter 144/368 - loss 0.22163407 - samples/sec: 70.21 - lr: 0.025000
2020-10-22 17:46:58,521 epoch 62 - iter 180/368 - loss 0.21434570 - samples/sec: 67.79 - lr: 0.025000
2020-10-22 17:47:15,293 epoch 62 - iter 216/368 - loss 0.21645800 - samples/sec: 69.94 - lr: 0.025000
2020-10-22 17:47:32,254 epoch 62 - iter 252/368 - loss 0.21566939 - samples/sec: 68.20 - lr: 0.025000
2020-10-22 17:47:49,079 epoch 62 - iter 288/368 - loss 0.21943863 - samples/sec: 69.79 - lr: 0.025000
2020-10-22 17:48:06,130 epoch 62 - iter 324/368 - loss 0.21985597 - samples/sec: 68.82 - lr: 0.025000
2020-10-22 17:48:22,861 epoch 62 - iter 360/368 - loss 0.22152737 - samples/sec: 69.

2020-10-22 18:07:08,211 epoch 67 - iter 360/368 - loss 0.21173068 - samples/sec: 71.10 - lr: 0.025000
2020-10-22 18:07:11,571 ----------------------------------------------------------------------------------------------------
2020-10-22 18:07:11,572 EPOCH 67 done: loss 0.2113 - lr 0.0250000
2020-10-22 18:08:00,530 DEV : loss 0.41175931692123413 - score 0.843
2020-10-22 18:08:02,451 BAD EPOCHS (no improvement): 3
2020-10-22 18:08:02,452 ----------------------------------------------------------------------------------------------------
2020-10-22 18:08:19,683 epoch 68 - iter 36/368 - loss 0.20770690 - samples/sec: 68.96 - lr: 0.025000
2020-10-22 18:08:36,691 epoch 68 - iter 72/368 - loss 0.21077614 - samples/sec: 69.02 - lr: 0.025000
2020-10-22 18:08:53,723 epoch 68 - iter 108/368 - loss 0.20838435 - samples/sec: 68.90 - lr: 0.025000
2020-10-22 18:09:11,123 epoch 68 - iter 144/368 - loss 0.20627320 - samples/sec: 67.52 - lr: 0.025000
2020-10-22 18:09:28,541 epoch 68 - iter 180/368 - lo

2020-10-22 18:28:12,164 epoch 73 - iter 144/368 - loss 0.19067199 - samples/sec: 54.38 - lr: 0.012500
2020-10-22 18:28:29,820 epoch 73 - iter 180/368 - loss 0.19144834 - samples/sec: 66.50 - lr: 0.012500
2020-10-22 18:28:47,289 epoch 73 - iter 216/368 - loss 0.18932441 - samples/sec: 67.29 - lr: 0.012500
2020-10-22 18:29:04,697 epoch 73 - iter 252/368 - loss 0.19458806 - samples/sec: 67.38 - lr: 0.012500
2020-10-22 18:29:22,140 epoch 73 - iter 288/368 - loss 0.19646669 - samples/sec: 66.30 - lr: 0.012500
2020-10-22 18:29:38,837 epoch 73 - iter 324/368 - loss 0.19240940 - samples/sec: 70.31 - lr: 0.012500
2020-10-22 18:29:55,242 epoch 73 - iter 360/368 - loss 0.19217938 - samples/sec: 70.49 - lr: 0.012500
2020-10-22 18:29:58,955 ----------------------------------------------------------------------------------------------------
2020-10-22 18:29:58,955 EPOCH 73 done: loss 0.1918 - lr 0.0125000
2020-10-22 18:30:46,731 DEV : loss 0.40996217727661133 - score 0.8517
2020-10-22 18:30:48,606 B

2020-10-22 18:49:00,437 DEV : loss 0.4110707640647888 - score 0.8481
2020-10-22 18:49:02,303 BAD EPOCHS (no improvement): 5
2020-10-22 18:49:02,304 ----------------------------------------------------------------------------------------------------
2020-10-22 18:49:19,196 epoch 79 - iter 36/368 - loss 0.19623629 - samples/sec: 70.38 - lr: 0.012500
2020-10-22 18:49:35,545 epoch 79 - iter 72/368 - loss 0.19466855 - samples/sec: 71.83 - lr: 0.012500
2020-10-22 18:49:52,132 epoch 79 - iter 108/368 - loss 0.18450918 - samples/sec: 70.78 - lr: 0.012500
2020-10-22 18:50:08,512 epoch 79 - iter 144/368 - loss 0.18651052 - samples/sec: 71.69 - lr: 0.012500
2020-10-22 18:50:25,021 epoch 79 - iter 180/368 - loss 0.18074880 - samples/sec: 71.17 - lr: 0.012500
2020-10-22 18:50:41,207 epoch 79 - iter 216/368 - loss 0.18359526 - samples/sec: 72.53 - lr: 0.012500
2020-10-22 18:50:57,477 epoch 79 - iter 252/368 - loss 0.18273030 - samples/sec: 71.14 - lr: 0.012500
2020-10-22 18:51:13,709 epoch 79 - iter

2020-10-22 19:09:20,209 epoch 84 - iter 252/368 - loss 0.16530717 - samples/sec: 70.60 - lr: 0.006250
2020-10-22 19:09:36,658 epoch 84 - iter 288/368 - loss 0.16759835 - samples/sec: 70.31 - lr: 0.006250
2020-10-22 19:09:53,462 epoch 84 - iter 324/368 - loss 0.16789974 - samples/sec: 69.86 - lr: 0.006250
2020-10-22 19:10:10,106 epoch 84 - iter 360/368 - loss 0.16967078 - samples/sec: 70.57 - lr: 0.006250
2020-10-22 19:10:13,586 ----------------------------------------------------------------------------------------------------
2020-10-22 19:10:13,587 EPOCH 84 done: loss 0.1691 - lr 0.0062500
2020-10-22 19:11:01,952 DEV : loss 0.4179345369338989 - score 0.8509
2020-10-22 19:11:03,839 BAD EPOCHS (no improvement): 1
2020-10-22 19:11:03,840 ----------------------------------------------------------------------------------------------------
2020-10-22 19:11:21,967 epoch 85 - iter 36/368 - loss 0.16621961 - samples/sec: 65.43 - lr: 0.006250
2020-10-22 19:11:39,272 epoch 85 - iter 72/368 - lo

2020-10-22 19:30:21,446 epoch 90 - iter 72/368 - loss 0.16681880 - samples/sec: 69.64 - lr: 0.006250
2020-10-22 19:30:38,370 epoch 90 - iter 108/368 - loss 0.16281888 - samples/sec: 69.38 - lr: 0.006250
2020-10-22 19:30:55,595 epoch 90 - iter 144/368 - loss 0.16076260 - samples/sec: 68.28 - lr: 0.006250
2020-10-22 19:31:12,321 epoch 90 - iter 180/368 - loss 0.16241432 - samples/sec: 70.20 - lr: 0.006250
2020-10-22 19:31:29,399 epoch 90 - iter 216/368 - loss 0.16308607 - samples/sec: 68.74 - lr: 0.006250
2020-10-22 19:31:46,346 epoch 90 - iter 252/368 - loss 0.16367439 - samples/sec: 68.27 - lr: 0.006250
2020-10-22 19:32:03,206 epoch 90 - iter 288/368 - loss 0.16312379 - samples/sec: 69.63 - lr: 0.006250
2020-10-22 19:32:20,130 epoch 90 - iter 324/368 - loss 0.16469129 - samples/sec: 69.39 - lr: 0.006250
2020-10-22 19:32:36,852 epoch 90 - iter 360/368 - loss 0.16539362 - samples/sec: 69.18 - lr: 0.006250
2020-10-22 19:32:40,642 -----------------------------------------------------------

2020-10-22 19:51:20,972 ----------------------------------------------------------------------------------------------------
2020-10-22 19:51:20,973 EPOCH 95 done: loss 0.1708 - lr 0.0062500
2020-10-22 19:52:10,549 DEV : loss 0.4162958562374115 - score 0.8512
2020-10-22 19:52:12,479 BAD EPOCHS (no improvement): 5
2020-10-22 19:52:12,479 ----------------------------------------------------------------------------------------------------
2020-10-22 19:52:29,724 epoch 96 - iter 36/368 - loss 0.15119970 - samples/sec: 68.84 - lr: 0.006250
2020-10-22 19:52:47,091 epoch 96 - iter 72/368 - loss 0.14604399 - samples/sec: 66.61 - lr: 0.006250
2020-10-22 19:53:03,906 epoch 96 - iter 108/368 - loss 0.15411143 - samples/sec: 68.80 - lr: 0.006250
2020-10-22 19:53:20,752 epoch 96 - iter 144/368 - loss 0.15791249 - samples/sec: 69.70 - lr: 0.006250
2020-10-22 19:53:37,776 epoch 96 - iter 180/368 - loss 0.16081407 - samples/sec: 68.94 - lr: 0.006250
2020-10-22 19:53:54,296 epoch 96 - iter 216/368 - lo

2020-10-22 20:12:03,219 epoch 101 - iter 180/368 - loss 0.16398447 - samples/sec: 70.18 - lr: 0.003125
2020-10-22 20:12:19,919 epoch 101 - iter 216/368 - loss 0.16046408 - samples/sec: 70.27 - lr: 0.003125
2020-10-22 20:12:36,773 epoch 101 - iter 252/368 - loss 0.15932449 - samples/sec: 69.66 - lr: 0.003125
2020-10-22 20:12:53,658 epoch 101 - iter 288/368 - loss 0.15720434 - samples/sec: 69.54 - lr: 0.003125
2020-10-22 20:13:10,379 epoch 101 - iter 324/368 - loss 0.15816732 - samples/sec: 70.21 - lr: 0.003125
2020-10-22 20:13:27,016 epoch 101 - iter 360/368 - loss 0.15858108 - samples/sec: 69.58 - lr: 0.003125
2020-10-22 20:13:30,955 ----------------------------------------------------------------------------------------------------
2020-10-22 20:13:30,955 EPOCH 101 done: loss 0.1578 - lr 0.0031250
2020-10-22 20:14:19,590 DEV : loss 0.41956210136413574 - score 0.8527
2020-10-22 20:14:21,493 BAD EPOCHS (no improvement): 5
2020-10-22 20:14:21,493 -----------------------------------------

2020-10-22 20:32:44,971 DEV : loss 0.4204995036125183 - score 0.8537
2020-10-22 20:32:46,860 BAD EPOCHS (no improvement): 4
2020-10-22 20:32:46,860 ----------------------------------------------------------------------------------------------------
2020-10-22 20:33:03,822 epoch 107 - iter 36/368 - loss 0.15842249 - samples/sec: 70.19 - lr: 0.001563
2020-10-22 20:33:20,275 epoch 107 - iter 72/368 - loss 0.15417178 - samples/sec: 71.40 - lr: 0.001563
2020-10-22 20:33:36,650 epoch 107 - iter 108/368 - loss 0.15052047 - samples/sec: 71.69 - lr: 0.001563
2020-10-22 20:33:53,246 epoch 107 - iter 144/368 - loss 0.14782753 - samples/sec: 70.73 - lr: 0.001563
2020-10-22 20:34:09,723 epoch 107 - iter 180/368 - loss 0.14994712 - samples/sec: 70.21 - lr: 0.001563
2020-10-22 20:34:26,792 epoch 107 - iter 216/368 - loss 0.15163033 - samples/sec: 68.79 - lr: 0.001563
2020-10-22 20:34:43,803 epoch 107 - iter 252/368 - loss 0.15213532 - samples/sec: 69.02 - lr: 0.001563
2020-10-22 20:35:02,295 epoch 10

2020-10-22 20:53:28,006 epoch 112 - iter 252/368 - loss 0.14145082 - samples/sec: 68.73 - lr: 0.000781
2020-10-22 20:53:44,692 epoch 112 - iter 288/368 - loss 0.14796462 - samples/sec: 70.40 - lr: 0.000781
2020-10-22 20:54:01,488 epoch 112 - iter 324/368 - loss 0.15068915 - samples/sec: 69.86 - lr: 0.000781
2020-10-22 20:54:18,526 epoch 112 - iter 360/368 - loss 0.15049402 - samples/sec: 67.88 - lr: 0.000781
2020-10-22 20:54:22,067 ----------------------------------------------------------------------------------------------------
2020-10-22 20:54:22,068 EPOCH 112 done: loss 0.1499 - lr 0.0007813
2020-10-22 20:55:11,263 DEV : loss 0.42142152786254883 - score 0.8527
2020-10-22 20:55:13,181 BAD EPOCHS (no improvement): 4
2020-10-22 20:55:13,182 ----------------------------------------------------------------------------------------------------
2020-10-22 20:55:30,759 epoch 113 - iter 36/368 - loss 0.19185520 - samples/sec: 67.43 - lr: 0.000781
2020-10-22 20:55:47,602 epoch 113 - iter 72/

2020-10-22 21:14:15,675 epoch 118 - iter 36/368 - loss 0.15156224 - samples/sec: 70.50 - lr: 0.000391
2020-10-22 21:14:32,902 epoch 118 - iter 72/368 - loss 0.14646500 - samples/sec: 67.12 - lr: 0.000391
2020-10-22 21:14:50,330 epoch 118 - iter 108/368 - loss 0.14566728 - samples/sec: 67.39 - lr: 0.000391
2020-10-22 21:15:07,331 epoch 118 - iter 144/368 - loss 0.14783675 - samples/sec: 68.00 - lr: 0.000391
2020-10-22 21:15:24,697 epoch 118 - iter 180/368 - loss 0.15149259 - samples/sec: 67.55 - lr: 0.000391
2020-10-22 21:15:41,564 epoch 118 - iter 216/368 - loss 0.14889265 - samples/sec: 68.59 - lr: 0.000391
2020-10-22 21:15:58,650 epoch 118 - iter 252/368 - loss 0.14959316 - samples/sec: 68.65 - lr: 0.000391
2020-10-22 21:16:15,602 epoch 118 - iter 288/368 - loss 0.14834335 - samples/sec: 68.23 - lr: 0.000391
2020-10-22 21:16:32,556 epoch 118 - iter 324/368 - loss 0.14866673 - samples/sec: 69.23 - lr: 0.000391
2020-10-22 21:16:48,999 epoch 118 - iter 360/368 - loss 0.15060692 - sample

2020-10-22 21:35:04,860 epoch 123 - iter 324/368 - loss 0.14747788 - samples/sec: 69.92 - lr: 0.000195
2020-10-22 21:35:21,868 epoch 123 - iter 360/368 - loss 0.14739360 - samples/sec: 69.00 - lr: 0.000195
2020-10-22 21:35:25,607 ----------------------------------------------------------------------------------------------------
2020-10-22 21:35:25,607 EPOCH 123 done: loss 0.1474 - lr 0.0001953
2020-10-22 21:36:15,522 DEV : loss 0.4210428297519684 - score 0.8522
2020-10-22 21:36:17,402 BAD EPOCHS (no improvement): 3
2020-10-22 21:36:17,403 ----------------------------------------------------------------------------------------------------
2020-10-22 21:36:34,464 epoch 124 - iter 36/368 - loss 0.15370604 - samples/sec: 68.48 - lr: 0.000195
2020-10-22 21:36:51,411 epoch 124 - iter 72/368 - loss 0.14630944 - samples/sec: 69.23 - lr: 0.000195
2020-10-22 21:37:07,873 epoch 124 - iter 108/368 - loss 0.14446120 - samples/sec: 70.28 - lr: 0.000195
2020-10-22 21:37:24,555 epoch 124 - iter 144/3

{'test_score': 0.8495,
 'dev_score_history': [0.7223,
  0.7195,
  0.7901,
  0.7735,
  0.6891,
  0.7216,
  0.7972,
  0.7919,
  0.7921,
  0.8052,
  0.7786,
  0.8052,
  0.7763,
  0.8141,
  0.8151,
  0.8082,
  0.81,
  0.7435,
  0.8141,
  0.8133,
  0.8167,
  0.8062,
  0.7837,
  0.8251,
  0.8192,
  0.8172,
  0.8159,
  0.7783,
  0.8149,
  0.8149,
  0.831,
  0.8274,
  0.8287,
  0.8272,
  0.8348,
  0.8379,
  0.8343,
  0.8289,
  0.8374,
  0.8282,
  0.8256,
  0.8264,
  0.8415,
  0.8369,
  0.8376,
  0.8356,
  0.8417,
  0.8392,
  0.841,
  0.8433,
  0.8381,
  0.8384,
  0.8458,
  0.8438,
  0.8433,
  0.8422,
  0.8456,
  0.8481,
  0.8463,
  0.8479,
  0.8453,
  0.8458,
  0.8502,
  0.8514,
  0.8486,
  0.8443,
  0.843,
  0.8445,
  0.8497,
  0.8512,
  0.8502,
  0.8491,
  0.8517,
  0.8517,
  0.8497,
  0.8502,
  0.8499,
  0.8481,
  0.8504,
  0.8494,
  0.8509,
  0.8486,
  0.8525,
  0.8509,
  0.8553,
  0.8512,
  0.852,
  0.8525,
  0.8545,
  0.8566,
  0.8543,
  0.8507,
  0.8532,
  0.8525,
  0.8512,
  0.852,
  0

# transformer

In [3]:
torch.cuda.empty_cache()

document_embeddings = TransformerDocumentEmbeddings('bert-base-uncased', fine_tune=True)

classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

trainer = ModelTrainer(classifier, corpus, optimizer=Adam)

trainer.train('classifiers/spooky_authorship_classifier_transformer',
              learning_rate=3e-5, # use very small learning rate
              mini_batch_size=1,
              mini_batch_chunk_size=4, # optionally set this if transformer is too much for your machine
              max_epochs=50, # terminate after 5 epochs
              )

2020-10-22 01:59:06,967 ----------------------------------------------------------------------------------------------------
2020-10-22 01:59:06,969 Model: "TextClassifier(
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
               

2020-10-22 01:59:06,969 ----------------------------------------------------------------------------------------------------
2020-10-22 01:59:06,969 Corpus: "Corpus: 11762 train + 3911 dev + 3906 test sentences"
2020-10-22 01:59:06,970 ----------------------------------------------------------------------------------------------------
2020-10-22 01:59:06,970 Parameters:
2020-10-22 01:59:06,970  - learning_rate: "3e-05"
2020-10-22 01:59:06,971  - mini_batch_size: "1"
2020-10-22 01:59:06,971  - patience: "3"
2020-10-22 01:59:06,971  - anneal_factor: "0.5"
2020-10-22 01:59:06,972  - max_epochs: "50"
2020-10-22 01:59:06,972  - shuffle: "True"
2020-10-22 01:59:06,973  - train_with_dev: "False"
2020-10-22 01:59:06,973  - batch_growth_annealing: "False"
2020-10-22 01:59:06,973 ----------------------------------------------------------------------------------------------------
2020-10-22 01:59:06,974 Model training base path: "classifiers/spooky_authorship_classifier_transformer"
2020-10-22 01

2020-10-22 03:08:01,902 epoch 5 - iter 10584/11762 - loss 0.16065545 - samples/sec: 19.66 - lr: 0.000030
2020-10-22 03:09:03,078 epoch 5 - iter 11760/11762 - loss 0.16426696 - samples/sec: 19.28 - lr: 0.000030
2020-10-22 03:09:03,214 ----------------------------------------------------------------------------------------------------
2020-10-22 03:09:03,215 EPOCH 5 done: loss 0.1643 - lr 0.0000300
2020-10-22 03:09:33,793 DEV : loss 1.2200660705566406 - score 0.8312
2020-10-22 03:09:35,671 BAD EPOCHS (no improvement): 2
2020-10-22 03:09:35,672 ----------------------------------------------------------------------------------------------------
2020-10-22 03:10:37,064 epoch 6 - iter 1176/11762 - loss 0.13672677 - samples/sec: 19.23 - lr: 0.000030
2020-10-22 03:11:38,217 epoch 6 - iter 2352/11762 - loss 0.13037400 - samples/sec: 19.29 - lr: 0.000030
2020-10-22 03:12:38,929 epoch 6 - iter 3528/11762 - loss 0.12928274 - samples/sec: 19.43 - lr: 0.000030
2020-10-22 03:13:39,718 epoch 6 - iter 

2020-10-22 04:05:06,359 epoch 11 - iter 2352/11762 - loss 0.48739608 - samples/sec: 19.49 - lr: 0.000015
2020-10-22 04:06:07,073 epoch 11 - iter 3528/11762 - loss 0.45137204 - samples/sec: 19.42 - lr: 0.000015
2020-10-22 04:07:07,693 epoch 11 - iter 4704/11762 - loss 0.43779539 - samples/sec: 19.46 - lr: 0.000015
2020-10-22 04:08:08,821 epoch 11 - iter 5880/11762 - loss 0.48126263 - samples/sec: 19.29 - lr: 0.000015
2020-10-22 04:09:09,987 epoch 11 - iter 7056/11762 - loss 0.49364693 - samples/sec: 19.28 - lr: 0.000015
2020-10-22 04:10:11,475 epoch 11 - iter 8232/11762 - loss 0.52547924 - samples/sec: 19.18 - lr: 0.000015
2020-10-22 04:11:12,445 epoch 11 - iter 9408/11762 - loss 0.57372096 - samples/sec: 19.35 - lr: 0.000015
2020-10-22 04:12:16,188 epoch 11 - iter 10584/11762 - loss 0.61729554 - samples/sec: 18.50 - lr: 0.000015
2020-10-22 04:13:18,869 epoch 11 - iter 11760/11762 - loss 0.65836284 - samples/sec: 18.82 - lr: 0.000015
2020-10-22 04:13:19,013 -----------------------------

2020-10-22 05:04:38,705 epoch 16 - iter 9408/11762 - loss 0.24334196 - samples/sec: 19.45 - lr: 0.000004
2020-10-22 05:05:37,681 epoch 16 - iter 10584/11762 - loss 0.24516871 - samples/sec: 20.00 - lr: 0.000004
2020-10-22 05:06:38,146 epoch 16 - iter 11760/11762 - loss 0.24826148 - samples/sec: 19.51 - lr: 0.000004
2020-10-22 05:06:38,285 ----------------------------------------------------------------------------------------------------
2020-10-22 05:06:38,286 EPOCH 16 done: loss 0.2482 - lr 0.0000038
2020-10-22 05:07:09,102 DEV : loss 1.1977378129959106 - score 0.7934
2020-10-22 05:07:11,084 BAD EPOCHS (no improvement): 1
2020-10-22 05:07:11,085 ----------------------------------------------------------------------------------------------------
2020-10-22 05:08:11,359 epoch 17 - iter 1176/11762 - loss 0.13938726 - samples/sec: 19.58 - lr: 0.000004
2020-10-22 05:09:12,185 epoch 17 - iter 2352/11762 - loss 0.19535208 - samples/sec: 19.39 - lr: 0.000004
2020-10-22 05:10:13,258 epoch 17 

{'test_score': 0.8469,
 'dev_score_history': [0.8251,
  0.8417,
  0.8468,
  0.8397,
  0.8312,
  0.8146,
  0.7366,
  0.7977,
  0.7635,
  0.5285,
  0.4017,
  0.7891,
  0.7233,
  0.8006,
  0.8105,
  0.7934,
  0.8131,
  0.8018,
  0.822],
 'train_loss_history': [0.8474903985428051,
  0.4328962669024286,
  0.2870155546113206,
  0.20389938118258502,
  0.1642565741552335,
  0.17634345009699567,
  0.31582380232078633,
  0.6961090544328526,
  0.43322091353308045,
  0.5079981821086628,
  0.658336912656301,
  0.7135963585032439,
  0.39383146148178166,
  0.34309273786842465,
  0.30168497799569527,
  0.24822078653878338,
  0.20477457962662532,
  0.1566602155592032,
  0.12913906098645614],
 'dev_loss_history': [0.6951817274093628,
  0.9179255962371826,
  0.6853742003440857,
  0.9877879619598389,
  1.2200660705566406,
  1.2726454734802246,
  1.0422911643981934,
  1.0451858043670654,
  1.0180667638778687,
  2.696859836578369,
  1.2079285383224487,
  1.0325223207473755,
  0.9829927086830139,
  1.1076633