In [18]:
# Train number of different models from Flair framework.
# With different sized trainin data
# save predictions of each model to file

# Notice - 1st run may take long as model weights are downloaded

### Version for dataset business news

In [19]:
#dataset = 'hatespeech'
dataset = 'businessnews'

In [20]:
# Dataset
# https://github.com/t-davidson/hate-speech-and-offensive-language

# Paper
# https://aaai.org/ocs/index.php/ICWSM/ICWSM17/paper/view/15665

# Their code
# https://github.com/t-davidson/hate-speech-and-offensive-language/blob/master/src/Automated%20Hate%20Speech%20Detection%20and%20the%20Problem%20of%20Offensive%20Language%20Python%203.6.ipynb

In [21]:
# Code based on https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md

In [22]:
# lm-multi-forward-v0.1.pt-tmp-cache.sqllite   up to 40,8 GB!!!

In [23]:
import pandas as pd
import random
import os
import numpy as np
import torch

import nltk
import string
import re
from nltk.stem.porter import *
import time

from collections import deque
from collections import defaultdict


# Display whole text of dataframe field and don't cut it
# pd.set_option('display.max_colwidth', -1)

pd.set_option('display.max_colwidth', 100)

In [24]:
print(f'torch version: {torch.__version__}')

torch version: 1.3.1


In [25]:
import flair
flair.__version__

'0.4.4'

In [26]:
#dataset = 'hatespeech'

current = os.getcwd()
basefolder = current + '/dataset_'+ dataset+'/'
datafolder = basefolder + 'data/'  # for example /dataset_businessnews/data/
print(basefolder)

infolder =  basefolder + 'input/'
outfolder = basefolder + 'output/'

/home/max/git/newcombined/dataset_businessnews/


In [27]:
from flair.data import Sentence
from flair.data_fetcher import NLPTaskDataFetcher

from flair.embeddings import WordEmbeddings, StackedEmbeddings, DocumentRNNEmbeddings
from flair.embeddings import DocumentPoolEmbeddings
from flair.embeddings import FlairEmbeddings, BertEmbeddings, ELMoEmbeddings
from flair.embeddings import BytePairEmbeddings

from flair.embeddings import OpenAIGPTEmbeddings
from flair.embeddings import OpenAIGPT2Embeddings

# New DocumentRNNEmbeddings, deprecates DocumentLSTMembeddings
# from flair.embeddings import #DocumentLSTMEmbeddings

from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from pathlib import Path

# new ones: GPT-1 and GPT-2
# https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings
# https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/TRANSFORMER_EMBEDDINGS.md

In [28]:
SEED = 1
# REPEATABILITY
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=1) # also called here

# TEXT PREPROCESS
puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£', 
 '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', 
 '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', 
 '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', 
 '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', ]

def clean_text(x):
    x = str(x)
    for punct in puncts:
        if punct in x: # comparison makes faster
            x = x.replace(punct, f' {punct} ')
    return x

quotes = ['″', '′', '"'] # apostrophe "'"
def mark_quotes(x):
    x = str(x)
    for quote in quotes:
        if quote in x: # comparison makes faster
            x = x.replace(quote, f'quote')
    return x

def preprocess(text_string):
    """
    Accepts a text string and replaces:
    1) urls with URLHERE
    2) lots of whitespace with one instance
    3) mentions with MENTIONHERE

    This allows us to get standardized counts of urls and mentions
    Without caring about specific people mentioned
    """
    space_pattern = '\s+'
    giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    
    mention_regex = '@[\w\-]+'
    
    #add #, mention, e.g. &#8120     
    mention_regex2  =   '&#[0-9]*' 
    
    
    parsed_text = re.sub(space_pattern, ' ', text_string)
    parsed_text = re.sub(giant_url_regex, ' URL ', parsed_text)
    parsed_text = re.sub(mention_regex, ' MENTION', parsed_text)
    parsed_text = re.sub(mention_regex2, ' MENTION', parsed_text)    
    return parsed_text

def tokenize(tweet):
    """Removes punctuation & excess whitespace, sets to lowercase,
    and stems tweets. Returns a list of stemmed tokens."""
    tweet = " ".join(re.split("[^a-zA-Z]*", tweet.lower())).strip()
    tokens = [stemmer.stem(t) for t in tweet.split()]
    return tokens

def basic_tokenize(tweet):
    """Same as tokenize but without the stemming"""
    # *needed to be removed or outputs a list of letters
    #tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
    tweet = " ".join(re.split("[^a-zA-Z.,!?]", tweet.lower())).strip()  
    #tweet = " ".join(re.split(r'\s+', tweet.lower())).strip()
    return tweet.split()

In [29]:
def loadData():
    train = pd.read_csv(basefolder+'input/train.csv',sep='\t', header = None)
    dev = pd.read_csv(basefolder+'input/dev.csv'  ,sep='\t', header = None)
    test = pd.read_csv(basefolder+'input/test.csv'  ,sep='\t', header = None)    
    train.columns = ['id','label','text']
    dev.columns  = ['id','label', 'text']
    test.columns  = ['id','label', 'text']   
    return train, dev, test

In [30]:
def preprosess_hatespeech(df):
    df.text = df.text.apply(lambda x: preprocess(x)) #URL, @mention etc
    df["text"] = df["text"].apply(lambda x: clean_text(x))
    df["text"] = df["text"].apply(lambda x: mark_quotes(x))
    #df.text = df.text.apply(lambda x: basic_tokenize(x))
    return df

In [31]:
def preprosess(df):
    df.text = df.text.apply(lambda x: preprocess(x)) #URL, @mention etc
    df["text"] = df["text"].apply(lambda x: clean_text(x))
    df["text"] = df["text"].apply(lambda x: mark_quotes(x))
    #df.text = df.text.apply(lambda x: basic_tokenize(x))
    return df

In [32]:
# Turn label from digit into Fasttext format __label__.  "1" into "__label__1"
def toFasttext(df):
    df['label'] = '__label__' + df['label'].astype(str)
    return df

In [33]:
#data_folder = infolder

In [34]:
# Max len of training data is 13718
13718

13718

In [37]:
# Load data
train, dev, test = loadData()

# train = train.iloc[np.random.permutation(len(train))]


#Test smaller
trainsize = len(train)

'''SET TRAINSIZE HERE'''
# 100, 200, 500, 1k, 3k, 7k, 18k

# 100,100   200,200  500,200    1k,200     3k,500    7k, 1k

# 1k,200 (gpt-1 failed, used copy of glove) glove was 0.55 but 2nd of it got 0.45
# 3k,500 gpt-1 failed again, copy of glove
# 7k,1k gpt-1 copy of glove

# 13k

trainsize = 200
devsize = 500


# pick trainsize from train, keeping class ratios
from sklearn.model_selection import train_test_split

seed_everything(seed=1)

#train = train[0:trainsize]
# slice only part of train to use, discard rest
if trainsize < len(train):
    train, _ = train_test_split(train, stratify=train['label'], train_size=trainsize)
if devsize < len(dev):
    dev, _ = train_test_split(dev, stratify=dev['label'], train_size=devsize)


# testsize = 100  needs to match 3k and same ordering as in final test set
testsize=500 #1000
seed_everything(seed=1)
test, _ = train_test_split(test, stratify=test['label'], train_size=testsize)

print(len(train))
print(len(dev))
print(len(test))

200
500
500


In [38]:
test.head()

Unnamed: 0,id,label,text
1061,13356,4,Hybrids: ENTITY Toyota Motor reaches 10 million cars sold\ntoday in 07:41\nENTITY Toyota Motor h...
1397,15397,1,BMW reportedly ends its self-driving car partnership with Chinese tech giant ENTITY Baidu\nAfter...
159,4294,0,The Head of Samsung Is Again Being Questioned Behind Closed Doors\nA South Korean judge question...
2065,18544,4,"China launches $11 billion fund for Central, Eastern Europe - Reuters\nBEIJING China has set up ..."
1300,7648,2,Alaska Air-Virgin America merger gets US nod\nThe US Department of Justice (DOJ) has approved Al...


### Preprocess

In [39]:
train = preprosess_hatespeech(train)
dev = preprosess_hatespeech(dev)
test = preprosess_hatespeech(test)

In [40]:
train = toFasttext(train)
dev = toFasttext(dev)
test = toFasttext(test)

In [41]:
train['text'].iloc[0]

"Coke ' s profit hit by weak developing markets ENTITY Coca - Cola ' s profit fell by more than half in the final quarter of the year ,  hurt by weakness in developing markets ,  but the company pointed to signs in North America and elsewhere that its shift to lower - calorie beverages and smaller ,  higher - priced packages is working .  Coke has been divesting from its bottling operations to focus on its more profitable concentrate business ,  a process it expects to accelerate in 2017 to meet a self - imposed December deadline .  That contributed to the 56 %  decline in fourth - quarter net income ,  to  $ 550 million ,  compared with  $ 1 . 24 billion a year earlier .  The company expects adjusted earnings per share to decline 1 %  to 4 %  in 2017 .  In the U . S .  ,  Coke is shifting from a volume - based model to one in which the company shares profit with its bottlers ,  selling smaller cans and bottles at higher prices .  Smaller packages in the U . S .  grew almost 10 %  in t

In [42]:
len(train['text'].iloc[0])

3117

In [43]:
train.head()

Unnamed: 0,id,label,text
11637,6406,__label__1,Coke ' s profit hit by weak developing markets ENTITY Coca - Cola ' s profit fell by more than h...
11694,4446,__label__0,Patent - holding company ’ s $ 533M verdict against Apple is dust on appeal Enlarge / A repre...
1964,14702,__label__0,"Greylock just hired Josh McFarland , who sold his Greylock - backed company to ENTITY Twitter J..."
9935,4947,__label__1,"Carrier gets state incentives , Trump pledge for keeping US jobs WASHINGTON ENTITY United Techn..."
10386,11601,__label__4,The Private Capital Management Inc . Increases Stake in ENTITY Nikeke Private Capital Managemen...


### Possible truncate of data fields

In [25]:
train['text'].map(len).hist()

<matplotlib.axes._subplots.AxesSubplot at 0x7f2ea76a6588>

In [26]:
# longest text in data set by characters
longest = train['text'].map(len).max()
longest

5690

In [44]:
# Truncate to 3K for faster processing
maxlen = 1500
#maxlen = 1000
#maxlen = 500

# at 7k, down from 500 to 400
#maxlen = 400

# 11k again 500 - elmo hanged
#maxlen = 300

train['text'] = train.apply(lambda row: row['text'][0:maxlen], axis=1 )
dev['text']   =   dev.apply(lambda row: row['text'][0:maxlen], axis=1 )
test['text']  =  test.apply(lambda row: row['text'][0:maxlen], axis=1 )


In [45]:
# Id in start of line is not Fasttext format: remove id
train.drop(['id'], axis=1, inplace=True)
dev.drop(['id'], axis=1, inplace=True)
test.drop(['id'], axis=1, inplace=True)

# Write to Flairs input csv files
train.to_csv(basefolder+'input/flair_train.csv',sep='\t', index = False, header = False)
dev.to_csv(basefolder+'input/flair_dev.csv'  ,sep='\t', index = False, header = False)
test.to_csv(basefolder+'input/flair_test.csv',sep='\t', index = False, header = False)

### Flair

### Stacked embeddings


In [46]:
# Embeddings
# https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md

# 'multi-forward', multi-lang English, German, French, Italian, Dutch, Polish, 
#        Mix of corpora (Web, Wikipedia, Subtitles, News)

# 'mix-forward'English,   Forward LM embeddings over mixed corpus (Web, Wikipedia, Subtitles)

`StackedEmbeddings` are currently a `WordEmbeddings` class, so they cannot directly be used to classify 
documents. They can only be used for sequence labeling.

However, you can put a stack of word embeddings into one of the `DocumentEmbeddings` classes such as `DocumentPoolEmbeddings` or `DocumentLSTMEmbeddings`. This way, you are specifying how to aggregate word embeddings for text classification

So `DocumentPoolEmbeddings` will simply average them, while `DocumentLSTMEmbeddings` will train an LSTM over them.

 https://github.com/zalandoresearch/flair/issues/414
 
 *update depracated: DocumentLSTMEmbeddings. (The functionality of this class is moved to 'DocumentRNNEmbeddings')

In [47]:
# OR average 
# document_embeddings = DocumentPoolEmbeddings(word_embeddings)

#### Model

In [48]:
# https://towardsdatascience.com/text-classification-with-state-of-the-art-nlp-library-flair-b541d7add21f

# https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md

In [49]:
infolder

'/home/max/git/newcombined/dataset_businessnews/input/'

In [50]:
data_folder = infolder
data_folder

'/home/max/git/newcombined/dataset_businessnews/input/'

In [51]:
corpus = NLPTaskDataFetcher.load_classification_corpus(data_folder=infolder, test_file='flair_test.csv', dev_file='flair_dev.csv', train_file='flair_train.csv')

2020-05-18 16:00:33,922 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-18 16:00:33,923 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-18 16:00:33,924 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-18 16:00:33,925 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  """Entry point for launching an IPython kernel.
  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


In [52]:
print(corpus)
#print(len(corpus.train))

# 2. create the label dictionary
label_dict = corpus.make_label_dictionary()
#print(label_dict)

Corpus: 200 train + 500 dev + 500 test sentences
2020-05-18 16:00:37,934 Computing label dictionary. Progress:


100%|██████████| 200/200 [00:00<00:00, 99039.06it/s]

2020-05-18 16:00:37,953 [b'1', b'0', b'4', b'3', b'2']





In [53]:
### Individual model only, skip when in batch

In [54]:
# classifier = TextClassifier(stacked_embeddings, label_dictionary=corpus.make_label_dictionary(),
#                            multi_label=False)

In [55]:
# Glove alone 0.902

In [56]:
# Flair multi takes a lot of disk space
# Flair-multi can take 20-40 GB for each direction!
# 

#NEW BEST adding preprocess puncts+quotes, glove, en-twitter, en-crawl:   test_score': 0.9242,

# Winners: Fasttext: en-crawl, 0.9129 solo

# word embeddings pooled:glove+fasttext 'test_score': 0.8892,

#                   WordEmbeddings('en'),       # FastText embeddings over news and wikipedia data                
#                   WordEmbeddings('en-crawl'), # FastText embeddings over Web crawls
# Good ones
# glove, en-twitter, en-crawl  0.9242
# Fasttext: en-crawl, 0.9129 solo



### Predict

In [57]:
def saveResults(savelist, name='all_default'):
    import shelve
    # file to be used
    filename= name+'.shlf'
    shelf = shelve.open(outfolder+filename)
    #shelf = shelve.open("all_flair.shlf")
    # serializing
    #shelf["all_flair"] = all_flair
    shelf[name] = savelist
    shelf.close() # you must close the shelve file!!!

In [58]:
def loadResults(name='all_default'):
    import shelve
    filename= name+'.shlf'
    shelf = shelve.open(outfolder+filename) 
    new = shelf[name]
    shelf.close()
    return new

In [69]:
''' Train with each embedding in the list, predict, add results to the list

Parameters: word_embeddings,    eg   WordEmbeddings('glove')
            modelname and modeldesc - text to be added in results
            savelist : list where results are appended. Can be empty or already including results
            epohcs: epochs to run

'''


def train_and_predict_single(embeddings, modelname, modeldesc, savelist, epochs=15, 
                             batch_size=32, embeddings_storage_mode='cpu'):
    
    print(modelname)
    start = time.time()
    
# PREPARE
    document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=512,
                                           bidirectional = False,
                                           rnn_type='LSTM', 
                                           reproject_words=True, reproject_words_dimension=256                                                    
                                           )

    seed_everything(SEED)
    corpus = NLPTaskDataFetcher.load_classification_corpus(data_folder, test_file='flair_test.csv', dev_file='flair_dev.csv', train_file='flair_train.csv')

    seed_everything(SEED)
    label_dict = corpus.make_label_dictionary()

    seed_everything(SEED)
    classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(),
                            multi_label=False)
    trainer = ModelTrainer(classifier, corpus)
    
# TRAINING
    seed_everything(SEED)
    trainer.train('./', 
              learning_rate=0.1,
              mini_batch_size=batch_size, # 32  # BERT OOM even with 16 batch -> need 8. others run on 32 or even more
              anneal_factor=0.5,
              patience=5,     
              max_epochs=epochs,  #15
              #embeddings_storage_mode='cpu',
              embeddings_storage_mode= embeddings_storage_mode,                  
              #embeddings_storage_mode='gpu'
              ) #max_epochs=150

    duration_train = time.time()-start

    # should this be set? embeddings_storage_mode='gpu'
    
    
# PREDICT - INDIVIDUAL, SLOWER, but more robust
    print('starting prediction')
    start_pred = time.time()

    # turn text into Flairs "Sentence object"
    test['flair_sentence'] = test['text'].apply(lambda x: Sentence(x))

    # discard output, result is put into object itself
    _ = test['flair_sentence'].apply(lambda x: classifier.predict(x))

    # sentence.labels returns a list containing flairs Label object that includes a dict. 
    # dig the values for predicted label + confidence from within the dict
    # the 'value' returns a str, cast it to int
    test['yhat'] = test['flair_sentence'].apply(lambda x: int(x.labels[0].to_dict()['value']))

    test['confidence'] = test['flair_sentence'].apply(lambda x: x.labels[0].to_dict()['confidence'])

    results = pd.DataFrame(test[['yhat', 'confidence']])
    results.columns=['label','confidence']
    results.head()
    
    
    
# ADD RESULTS TO LIST

    duration_predict = time.time() - start_pred
    #print(f'Duration {duration:.2f} s')

    savelist.append({'model': modelname,
                'labels': results['label'],
                'confidence': results['confidence'],
                'traintime': duration_train,
                'predtime3k': duration_predict,
                'modeldesc': modeldesc
               }
              )

In [60]:
''' BATCH VERSION OF PREDICTION
Train with each embedding in the list, predict, add results to the list

Parameters: word_embeddings,    eg   WordEmbeddings('glove')
            modelname and modeldesc - text to be added in results
            savelist : list where results are appended. Can be empty or already including results
            epohcs: epochs to run

'''


def train_and_predict(embeddings, modelname, modeldesc, savelist, epochs=15, batch_size=32):
    
    print(modelname)
    start = time.time()
    
# PREPARE
    document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=512,
                                           bidirectional = False,
                                           rnn_type='LSTM', 
                                           reproject_words=True, reproject_words_dimension=256                                                    
                                           )

    seed_everything(SEED)
    corpus = NLPTaskDataFetcher.load_classification_corpus(data_folder, test_file='flair_test.csv', dev_file='flair_dev.csv', train_file='flair_train.csv')

    seed_everything(SEED)
    label_dict = corpus.make_label_dictionary()

    seed_everything(SEED)
    classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(),
                            multi_label=False)
    trainer = ModelTrainer(classifier, corpus)
    
# TRAINING
    seed_everything(SEED)
    trainer.train('./', 
              learning_rate=0.1,
              mini_batch_size=batch_size, # 32  # BERT OOM even with 16 batch -> need 8. others run on 32 or even more
              anneal_factor=0.5,
              patience=5,     
              max_epochs=epochs,  #15
              #embeddings_storage_mode='gpu'
              embeddings_storage_mode='cpu'                  
              ) #max_epochs=150

    duration_train = time.time()-start

    # should this be set? embeddings_storage_mode='gpu'
    
    
#  PREDICT BATCH - FASTER
#  https://github.com/flairNLP/flair/issues/1443

    print('starting prediction')
    start_pred = time.time()
    
    # place for sentences
    sentences1 = deque() # deque is list with access on both ends
    
    # turn text into Flairs "Sentence object"
    test['flair_sentence'] = test['text'].apply(lambda x: Sentence(x))
    
    # add them to list
    for i in range(len(test)):
        sentences1.append(test.iloc[i]['flair_sentence'])
        
    # initialize values for loop
    scores = defaultdict(float)
    values = defaultdict(str)
    confidences = defaultdict(float)
    i = 0

    # predict for all sentences (mini_batch_size=32 worked with 1000 reviews on Colab)
    #classifier.predict(sentences1, mini_batch_size=32)    
    classifier.predict(sentences1, mini_batch_size=16)    
        
    # deque version to save memory
    while len(sentences1) > 0:
        sentence = sentences1.popleft()
        
        # is the dict value of 'score' same as 'condifence' in individual?
        #scores[i] = sentence.labels[0].score
        values[i] = int(sentence.labels[0].value)
        confidences[i] = sentence.labels[0].score
        i+=1

    # convert to dataframe
    results = pd.DataFrame({'label': values, 'confidence': confidences})        
        
    #test['yhat'] = values
    #test['confidence'] = confidences
        

    # discard output, result is put into object itself
    #_ = test['flair_sentence'].apply(lambda x: classifier.predict(x))

    # sentence.labels returns a list containing flairs Label object that includes a dict. 
    # dig the values for predicted label + confidence from within the dict
    # the 'value' returns a str, cast it to int
    #test['yhat'] = test['flair_sentence'].apply(lambda x: int(x.labels[0].to_dict()['value']))

    #test['confidence'] = test['flair_sentence'].apply(lambda x: x.labels[0].to_dict()['confidence'])

    #results = pd.DataFrame(test[['yhat', 'confidence']])
    #results.columns=['label','confidence']
    #results.head()


    
    
# ADD RESULTS TO LIST

    duration_predict = time.time() - start_pred
    #print(f'Duration {duration:.2f} s')

    savelist.append({'model': modelname,
                'labels': results['label'],
                'confidence': results['confidence'],
                'traintime': duration_train,
                'predtime3k': duration_predict,
                'modeldesc': modeldesc
               }
              )

Object `predict` not found.


### function mode


In [61]:
savelist = []
modeldesc = '512LSTM_15epoch_non-bi'
BATCH_SIZE=32
#BATCH_SIZE=64
EPOCHS = 15

#### To use ELMoEmbeddings, please first install with "pip install allennlp"

In [306]:
# test time with single
total_time = time.time()

word_embeddings = [ WordEmbeddings('glove'),              ]
modelname = 'glove'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

# word_embeddings = [ ELMoEmbeddings('original')              ]
# modelname = 'elmo'
# train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)


print(time.time() - total_time)

# training time 2k char, test 3k sample , batch 64
# train, dev: 100 + 100 , time: 69 s glove
#                                  s elmo: very long - 10min+ then stopped with break

# test-set to 100 also, char to 1.5k
# elmo only 82 sec   embedding_storagemode = gpu  -> 84.7 sec. 3 sec more than cpu



# prediction takes longer time than training of model itself.

# test set back to 3k:   
# batch prediciton: 166 sec

glove
2020-05-17 10:08:14,933 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 10:08:14,934 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 10:08:14,934 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 10:08:14,934 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 10:08:21,970 Computing label dictionary. Progress:


100%|██████████| 3000/3000 [00:00<00:00, 237162.85it/s]

2020-05-17 10:08:21,986 [b'0', b'3', b'1', b'4', b'2']
2020-05-17 10:08:21,987 Computing label dictionary. Progress:



100%|██████████| 3000/3000 [00:00<00:00, 214341.40it/s]

2020-05-17 10:08:22,005 [b'0', b'3', b'1', b'4', b'2']
2020-05-17 10:08:22,009 ----------------------------------------------------------------------------------------------------
2020-05-17 10:08:22,011 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
    )
    (word_reprojection_map): Linear(in_features=100, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 10:08:22,012 ----------------------------------------------------------------------------------------------------
2020-05-17 10:08:22,013 Corpus: "Corpus: 3000 train + 500 dev + 500 test sentences"
2020-05-17 10:08:22,014 ----------------------------------------------------------------------------------------------------
2020-05-17 10:08:




2020-05-17 10:08:22,261 epoch 1 - iter 0/94 - loss 1.59496295 - samples/sec: 1267.10
2020-05-17 10:08:23,956 epoch 1 - iter 9/94 - loss 1.51577960 - samples/sec: 170.68
2020-05-17 10:08:25,313 epoch 1 - iter 18/94 - loss 1.50560645 - samples/sec: 213.37
2020-05-17 10:08:26,648 epoch 1 - iter 27/94 - loss 1.50031741 - samples/sec: 216.80
2020-05-17 10:08:28,262 epoch 1 - iter 36/94 - loss 1.48409058 - samples/sec: 179.31
2020-05-17 10:08:29,583 epoch 1 - iter 45/94 - loss 1.47157822 - samples/sec: 219.23
2020-05-17 10:08:30,890 epoch 1 - iter 54/94 - loss 1.46278243 - samples/sec: 221.62
2020-05-17 10:08:32,203 epoch 1 - iter 63/94 - loss 1.45844254 - samples/sec: 220.67
2020-05-17 10:08:33,484 epoch 1 - iter 72/94 - loss 1.45734484 - samples/sec: 226.05
2020-05-17 10:08:34,790 epoch 1 - iter 81/94 - loss 1.45714777 - samples/sec: 221.59
2020-05-17 10:08:36,095 epoch 1 - iter 90/94 - loss 1.45742075 - samples/sec: 221.96
2020-05-17 10:08:36,523 ------------------------------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 10:08:43,930 ----------------------------------------------------------------------------------------------------
2020-05-17 10:08:44,038 epoch 2 - iter 0/94 - loss 1.43055642 - samples/sec: 2732.62
2020-05-17 10:08:44,918 epoch 2 - iter 9/94 - loss 1.40463394 - samples/sec: 330.23
2020-05-17 10:08:45,819 epoch 2 - iter 18/94 - loss 1.43121862 - samples/sec: 321.92
2020-05-17 10:08:46,707 epoch 2 - iter 27/94 - loss 1.43193663 - samples/sec: 327.14
2020-05-17 10:08:47,796 epoch 2 - iter 36/94 - loss 1.42722055 - samples/sec: 266.01
2020-05-17 10:08:48,892 epoch 2 - iter 45/94 - loss 1.42128887 - samples/sec: 264.56
2020-05-17 10:08:49,784 epoch 2 - iter 54/94 - loss 1.42119730 - samples/sec: 325.25
2020-05-17 10:08:50,686 epoch 2 - iter 63/94 - loss 1.41948917 - samples/sec: 321.82
2020-05-17 10:08:51,592 epoch 2 - iter 72/94 - loss 1.42336605 - samples/sec: 320.34
2020-05-17 10:08:52,495 epoch 2 - iter 81/94 - loss 1.42612645 - samples/sec: 321.62
2020-05-17 10:08:53,401 ep

2020-05-17 10:09:55,381 ----------------------------------------------------------------------------------------------------
2020-05-17 10:09:55,492 epoch 8 - iter 0/94 - loss 1.22088397 - samples/sec: 2657.98
2020-05-17 10:09:56,386 epoch 8 - iter 9/94 - loss 1.34370215 - samples/sec: 324.48
2020-05-17 10:09:57,283 epoch 8 - iter 18/94 - loss 1.31373597 - samples/sec: 323.65
2020-05-17 10:09:58,176 epoch 8 - iter 27/94 - loss 1.32437493 - samples/sec: 324.84
2020-05-17 10:09:59,072 epoch 8 - iter 36/94 - loss 1.33341572 - samples/sec: 323.99
2020-05-17 10:09:59,967 epoch 8 - iter 45/94 - loss 1.33867406 - samples/sec: 323.95
2020-05-17 10:10:00,872 epoch 8 - iter 54/94 - loss 1.34700135 - samples/sec: 320.69
2020-05-17 10:10:01,779 epoch 8 - iter 63/94 - loss 1.34930859 - samples/sec: 320.00
2020-05-17 10:10:02,684 epoch 8 - iter 72/94 - loss 1.35194235 - samples/sec: 320.48
2020-05-17 10:10:03,579 epoch 8 - iter 81/94 - loss 1.35200613 - samples/sec: 324.45
2020-05-17 10:10:04,475 ep

2020-05-17 10:11:09,875 BAD EPOCHS (no improvement): 0
2020-05-17 10:11:13,279 ----------------------------------------------------------------------------------------------------
2020-05-17 10:11:13,391 epoch 14 - iter 0/94 - loss 1.42073476 - samples/sec: 2628.20
2020-05-17 10:11:14,302 epoch 14 - iter 9/94 - loss 1.42645644 - samples/sec: 318.68
2020-05-17 10:11:15,199 epoch 14 - iter 18/94 - loss 1.37679923 - samples/sec: 323.17
2020-05-17 10:11:16,114 epoch 14 - iter 27/94 - loss 1.36944697 - samples/sec: 317.29
2020-05-17 10:11:17,015 epoch 14 - iter 36/94 - loss 1.34815915 - samples/sec: 321.79
2020-05-17 10:11:17,919 epoch 14 - iter 45/94 - loss 1.35436088 - samples/sec: 321.02
2020-05-17 10:11:18,821 epoch 14 - iter 54/94 - loss 1.35801115 - samples/sec: 321.86
2020-05-17 10:11:19,744 epoch 14 - iter 63/94 - loss 1.34375231 - samples/sec: 314.18
2020-05-17 10:11:20,651 epoch 14 - iter 72/94 - loss 1.32846655 - samples/sec: 320.06
2020-05-17 10:11:21,549 epoch 14 - iter 81/94 -

2020-05-17 10:12:22,727 DEV : loss 1.22150456905365 - score 0.508
2020-05-17 10:12:22,830 BAD EPOCHS (no improvement): 4
2020-05-17 10:12:22,832 ----------------------------------------------------------------------------------------------------
2020-05-17 10:12:22,935 epoch 20 - iter 0/94 - loss 1.20622087 - samples/sec: 2836.30
2020-05-17 10:12:23,863 epoch 20 - iter 9/94 - loss 1.27646676 - samples/sec: 312.81
2020-05-17 10:12:24,824 epoch 20 - iter 18/94 - loss 1.24721436 - samples/sec: 301.65
2020-05-17 10:12:25,740 epoch 20 - iter 27/94 - loss 1.26739571 - samples/sec: 316.62
2020-05-17 10:12:26,680 epoch 20 - iter 36/94 - loss 1.28710094 - samples/sec: 309.02
2020-05-17 10:12:27,662 epoch 20 - iter 45/94 - loss 1.29483097 - samples/sec: 295.41
2020-05-17 10:12:28,600 epoch 20 - iter 54/94 - loss 1.28565171 - samples/sec: 309.45
2020-05-17 10:12:29,488 epoch 20 - iter 63/94 - loss 1.28401827 - samples/sec: 326.81
2020-05-17 10:12:30,385 epoch 20 - iter 72/94 - loss 1.29490304 - s

In [46]:
total_time = time.time()

word_embeddings = [ WordEmbeddings('glove'),              ]
modelname = 'glove'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

word_embeddings = [ WordEmbeddings('en-crawl'),                 ]
modelname = 'fasttext web-crawl'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

word_embeddings = [ WordEmbeddings('en'),                 ]
modelname = 'fasttext news/wiki'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

word_embeddings = [ WordEmbeddings('en-twitter'),                 ]
modelname = 'en-twitter'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

word_embeddings = [ ELMoEmbeddings('original')              ]
modelname = 'elmo'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

print(time.time() - total_time)

# training times
#       3K char long datafields:  2687 sec
#       2K char long datafields:  2220 sec, 20% of time away with 1/3 away from text length.

#       1.5k char , only 100 test-set  373 sec
#       518 sec with batch=32 prediction  -> to small fraction


# older, 8k long, or hatespeech=?
# 1289 sec on 100 train (was 2x elmo)

# 2403 sec on 18k
# 2684 s   18 k

# tiny - 500char, 500 test-set: 295 sec,  with 60epochs 331 sec  (was 100,100 train-dev)


# 500char, 100,200 train-dev (test-500)  60 epo already 1022 secs

# 300char, 13k train 3267 sec

glove
2020-05-17 14:32:07,967 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 14:32:07,968 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 14:32:07,969 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 14:32:07,970 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 14:32:19,461 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 268876.70it/s]

2020-05-17 14:32:19,515 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:32:19,516 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 267239.48it/s]

2020-05-17 14:32:19,571 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:32:19,575 ----------------------------------------------------------------------------------------------------
2020-05-17 14:32:19,576 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
    )
    (word_reprojection_map): Linear(in_features=100, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 14:32:19,577 ----------------------------------------------------------------------------------------------------
2020-05-17 14:32:19,578 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 14:32:19,578 ----------------------------------------------------------------------------------------------------
2020-05-17 14:32




2020-05-17 14:32:19,832 epoch 1 - iter 0/429 - loss 1.60230374 - samples/sec: 5605.66
2020-05-17 14:32:24,998 epoch 1 - iter 42/429 - loss 1.47056235 - samples/sec: 260.57
2020-05-17 14:32:30,367 epoch 1 - iter 84/429 - loss 1.46664594 - samples/sec: 250.67
2020-05-17 14:32:35,594 epoch 1 - iter 126/429 - loss 1.46051915 - samples/sec: 257.53
2020-05-17 14:32:42,115 epoch 1 - iter 168/429 - loss 1.45670071 - samples/sec: 206.32
2020-05-17 14:32:47,677 epoch 1 - iter 210/429 - loss 1.44830669 - samples/sec: 241.95
2020-05-17 14:32:53,029 epoch 1 - iter 252/429 - loss 1.44123269 - samples/sec: 251.49
2020-05-17 14:32:58,381 epoch 1 - iter 294/429 - loss 1.43721635 - samples/sec: 251.46
2020-05-17 14:33:03,925 epoch 1 - iter 336/429 - loss 1.43126203 - samples/sec: 242.78
2020-05-17 14:33:09,539 epoch 1 - iter 378/429 - loss 1.42520460 - samples/sec: 239.74
2020-05-17 14:33:15,205 epoch 1 - iter 420/429 - loss 1.41864967 - samples/sec: 237.53
2020-05-17 14:33:16,206 ----------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 14:33:21,182 ----------------------------------------------------------------------------------------------------
2020-05-17 14:33:21,253 epoch 2 - iter 0/429 - loss 1.31067395 - samples/sec: 19857.91
2020-05-17 14:33:23,882 epoch 2 - iter 42/429 - loss 1.36308493 - samples/sec: 512.78
2020-05-17 14:33:26,674 epoch 2 - iter 84/429 - loss 1.36795719 - samples/sec: 482.45
2020-05-17 14:33:29,513 epoch 2 - iter 126/429 - loss 1.35717346 - samples/sec: 475.46
2020-05-17 14:33:33,742 epoch 2 - iter 168/429 - loss 1.35454781 - samples/sec: 318.36
2020-05-17 14:33:36,489 epoch 2 - iter 210/429 - loss 1.35187551 - samples/sec: 490.87
2020-05-17 14:33:39,473 epoch 2 - iter 252/429 - loss 1.34743849 - samples/sec: 451.46
2020-05-17 14:33:42,315 epoch 2 - iter 294/429 - loss 1.34670688 - samples/sec: 474.08
2020-05-17 14:33:45,266 epoch 2 - iter 336/429 - loss 1.35577561 - samples/sec: 457.18
2020-05-17 14:33:48,082 epoch 2 - iter 378/429 - loss 1.35480325 - samples/sec: 478.69
2020-05

2020-05-17 14:36:32,185 DEV : loss 1.1603419780731201 - score 0.52
2020-05-17 14:36:32,244 BAD EPOCHS (no improvement): 1
2020-05-17 14:36:32,246 ----------------------------------------------------------------------------------------------------
2020-05-17 14:36:32,316 epoch 8 - iter 0/429 - loss 0.96004170 - samples/sec: 19811.36
2020-05-17 14:36:35,144 epoch 8 - iter 42/429 - loss 1.14324293 - samples/sec: 476.62
2020-05-17 14:36:37,963 epoch 8 - iter 84/429 - loss 1.14310255 - samples/sec: 478.24
2020-05-17 14:36:40,809 epoch 8 - iter 126/429 - loss 1.14638663 - samples/sec: 473.50
2020-05-17 14:36:43,638 epoch 8 - iter 168/429 - loss 1.14470754 - samples/sec: 476.34
2020-05-17 14:36:46,469 epoch 8 - iter 210/429 - loss 1.13912979 - samples/sec: 476.17
2020-05-17 14:36:49,327 epoch 8 - iter 252/429 - loss 1.14251198 - samples/sec: 471.48
2020-05-17 14:36:52,155 epoch 8 - iter 294/429 - loss 1.14037308 - samples/sec: 476.59
2020-05-17 14:36:54,957 epoch 8 - iter 336/429 - loss 1.137

2020-05-17 14:39:35,128 ----------------------------------------------------------------------------------------------------
2020-05-17 14:39:35,129 EPOCH 13 done: loss 1.0364 - lr 0.1000
2020-05-17 14:39:35,935 DEV : loss 1.0511728525161743 - score 0.62
2020-05-17 14:39:35,996 BAD EPOCHS (no improvement): 0
2020-05-17 14:39:39,177 ----------------------------------------------------------------------------------------------------
2020-05-17 14:39:39,263 epoch 14 - iter 0/429 - loss 0.75632226 - samples/sec: 18592.78
2020-05-17 14:39:42,073 epoch 14 - iter 42/429 - loss 1.03423427 - samples/sec: 479.56
2020-05-17 14:39:44,951 epoch 14 - iter 84/429 - loss 1.01081554 - samples/sec: 468.23
2020-05-17 14:39:47,791 epoch 14 - iter 126/429 - loss 1.00596304 - samples/sec: 474.69
2020-05-17 14:39:50,629 epoch 14 - iter 168/429 - loss 1.01613706 - samples/sec: 474.81
2020-05-17 14:39:53,493 epoch 14 - iter 210/429 - loss 1.02071031 - samples/sec: 470.57
2020-05-17 14:39:56,481 epoch 14 - iter

  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 14:41:02,857 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 266977.84it/s]

2020-05-17 14:41:02,911 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:41:02,912 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 236155.76it/s]

2020-05-17 14:41:02,975 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:41:02,979 ----------------------------------------------------------------------------------------------------
2020-05-17 14:41:02,980 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('en-crawl')
    )
    (word_reprojection_map): Linear(in_features=300, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 14:41:02,982 ----------------------------------------------------------------------------------------------------
2020-05-17 14:41:02,983 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 14:41:02,984 ----------------------------------------------------------------------------------------------------
2020-05-17 14




2020-05-17 14:41:03,127 epoch 1 - iter 0/429 - loss 1.61723661 - samples/sec: 10310.26
2020-05-17 14:41:10,386 epoch 1 - iter 42/429 - loss 1.47664226 - samples/sec: 185.36
2020-05-17 14:41:16,295 epoch 1 - iter 84/429 - loss 1.46764976 - samples/sec: 227.75
2020-05-17 14:41:22,416 epoch 1 - iter 126/429 - loss 1.45863838 - samples/sec: 219.82
2020-05-17 14:41:28,607 epoch 1 - iter 168/429 - loss 1.45404386 - samples/sec: 217.36
2020-05-17 14:41:34,542 epoch 1 - iter 210/429 - loss 1.44682326 - samples/sec: 226.78
2020-05-17 14:41:40,379 epoch 1 - iter 252/429 - loss 1.43909471 - samples/sec: 230.56
2020-05-17 14:41:46,215 epoch 1 - iter 294/429 - loss 1.43461333 - samples/sec: 230.57
2020-05-17 14:41:52,192 epoch 1 - iter 336/429 - loss 1.42887211 - samples/sec: 225.16
2020-05-17 14:41:59,853 epoch 1 - iter 378/429 - loss 1.42299350 - samples/sec: 175.62
2020-05-17 14:42:05,873 epoch 1 - iter 420/429 - loss 1.41584192 - samples/sec: 223.54
2020-05-17 14:42:06,947 ---------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 14:42:19,401 ----------------------------------------------------------------------------------------------------
2020-05-17 14:42:19,478 epoch 2 - iter 0/429 - loss 1.30463672 - samples/sec: 18244.78
2020-05-17 14:42:22,351 epoch 2 - iter 42/429 - loss 1.34041547 - samples/sec: 468.97
2020-05-17 14:42:25,241 epoch 2 - iter 84/429 - loss 1.34866471 - samples/sec: 466.35
2020-05-17 14:42:28,295 epoch 2 - iter 126/429 - loss 1.33701020 - samples/sec: 441.69
2020-05-17 14:42:31,391 epoch 2 - iter 168/429 - loss 1.33589731 - samples/sec: 436.02
2020-05-17 14:42:34,421 epoch 2 - iter 210/429 - loss 1.32843123 - samples/sec: 444.73
2020-05-17 14:42:37,383 epoch 2 - iter 252/429 - loss 1.31560871 - samples/sec: 455.87
2020-05-17 14:42:40,386 epoch 2 - iter 294/429 - loss 1.31022782 - samples/sec: 448.81
2020-05-17 14:42:43,521 epoch 2 - iter 336/429 - loss 1.31360439 - samples/sec: 429.84
2020-05-17 14:42:46,590 epoch 2 - iter 378/429 - loss 1.30416620 - samples/sec: 439.34
2020-05

2020-05-17 14:46:03,298 DEV : loss 1.0661033391952515 - score 0.56
2020-05-17 14:46:03,359 BAD EPOCHS (no improvement): 2
2020-05-17 14:46:03,360 ----------------------------------------------------------------------------------------------------
2020-05-17 14:46:03,436 epoch 8 - iter 0/429 - loss 1.00424874 - samples/sec: 18220.31
2020-05-17 14:46:06,502 epoch 8 - iter 42/429 - loss 1.06998360 - samples/sec: 439.40
2020-05-17 14:46:09,579 epoch 8 - iter 84/429 - loss 1.06157779 - samples/sec: 438.26
2020-05-17 14:46:12,661 epoch 8 - iter 126/429 - loss 1.06626726 - samples/sec: 437.46
2020-05-17 14:46:15,911 epoch 8 - iter 168/429 - loss 1.06315096 - samples/sec: 414.86
2020-05-17 14:46:19,241 epoch 8 - iter 210/429 - loss 1.05659832 - samples/sec: 405.03
2020-05-17 14:46:22,569 epoch 8 - iter 252/429 - loss 1.05027437 - samples/sec: 405.18
2020-05-17 14:46:25,835 epoch 8 - iter 294/429 - loss 1.04967482 - samples/sec: 412.74
2020-05-17 14:46:29,054 epoch 8 - iter 336/429 - loss 1.048

2020-05-17 14:50:00,394 ----------------------------------------------------------------------------------------------------
2020-05-17 14:50:00,395 EPOCH 13 done: loss 0.9601 - lr 0.1000
2020-05-17 14:50:01,274 DEV : loss 0.9751898050308228 - score 0.584
2020-05-17 14:50:01,339 BAD EPOCHS (no improvement): 1
2020-05-17 14:50:01,340 ----------------------------------------------------------------------------------------------------
2020-05-17 14:50:01,420 epoch 14 - iter 0/429 - loss 0.76243532 - samples/sec: 17305.55
2020-05-17 14:50:04,616 epoch 14 - iter 42/429 - loss 0.92409004 - samples/sec: 421.85
2020-05-17 14:50:07,860 epoch 14 - iter 84/429 - loss 0.92303703 - samples/sec: 415.50
2020-05-17 14:50:11,106 epoch 14 - iter 126/429 - loss 0.93769974 - samples/sec: 415.44
2020-05-17 14:50:14,445 epoch 14 - iter 168/429 - loss 0.95245250 - samples/sec: 403.66
2020-05-17 14:50:17,805 epoch 14 - iter 210/429 - loss 0.96040384 - samples/sec: 401.07
2020-05-17 14:50:21,069 epoch 14 - ite

  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 14:51:45,662 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 261832.65it/s]

2020-05-17 14:51:45,718 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:51:45,719 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 261893.43it/s]

2020-05-17 14:51:45,775 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 14:51:45,778 ----------------------------------------------------------------------------------------------------
2020-05-17 14:51:45,779 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('en')
    )
    (word_reprojection_map): Linear(in_features=300, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 14:51:45,780 ----------------------------------------------------------------------------------------------------
2020-05-17 14:51:45,781 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 14:51:45,782 ----------------------------------------------------------------------------------------------------
2020-05-17 14:51:45




2020-05-17 14:51:45,921 epoch 1 - iter 0/429 - loss 1.60830641 - samples/sec: 10628.46
2020-05-17 14:51:50,171 epoch 1 - iter 42/429 - loss 1.47772865 - samples/sec: 316.88
2020-05-17 14:51:54,474 epoch 1 - iter 84/429 - loss 1.46796460 - samples/sec: 313.15
2020-05-17 14:51:58,634 epoch 1 - iter 126/429 - loss 1.46031228 - samples/sec: 323.83
2020-05-17 14:52:02,741 epoch 1 - iter 168/429 - loss 1.45698598 - samples/sec: 328.05
2020-05-17 14:52:07,007 epoch 1 - iter 210/429 - loss 1.45197116 - samples/sec: 315.89
2020-05-17 14:52:13,229 epoch 1 - iter 252/429 - loss 1.44605934 - samples/sec: 216.30
2020-05-17 14:52:17,585 epoch 1 - iter 294/429 - loss 1.44421224 - samples/sec: 309.16
2020-05-17 14:52:21,892 epoch 1 - iter 336/429 - loss 1.43983900 - samples/sec: 312.78
2020-05-17 14:52:26,219 epoch 1 - iter 378/429 - loss 1.43629472 - samples/sec: 311.24
2020-05-17 14:52:30,551 epoch 1 - iter 420/429 - loss 1.43210892 - samples/sec: 310.88
2020-05-17 14:52:31,362 ---------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 14:52:43,507 ----------------------------------------------------------------------------------------------------
2020-05-17 14:52:43,593 epoch 2 - iter 0/429 - loss 1.41913736 - samples/sec: 16421.32
2020-05-17 14:52:46,851 epoch 2 - iter 42/429 - loss 1.41625045 - samples/sec: 413.72
2020-05-17 14:52:50,062 epoch 2 - iter 84/429 - loss 1.42565840 - samples/sec: 419.75
2020-05-17 14:52:53,342 epoch 2 - iter 126/429 - loss 1.41891405 - samples/sec: 411.11
2020-05-17 14:52:56,630 epoch 2 - iter 168/429 - loss 1.41955020 - samples/sec: 409.88
2020-05-17 14:52:59,876 epoch 2 - iter 210/429 - loss 1.41767908 - samples/sec: 415.37
2020-05-17 14:53:03,143 epoch 2 - iter 252/429 - loss 1.41266675 - samples/sec: 412.73
2020-05-17 14:53:06,374 epoch 2 - iter 294/429 - loss 1.41396501 - samples/sec: 417.19
2020-05-17 14:53:09,752 epoch 2 - iter 336/429 - loss 1.41872265 - samples/sec: 398.99
2020-05-17 14:53:13,100 epoch 2 - iter 378/429 - loss 1.41854358 - samples/sec: 402.50
2020-05

2020-05-17 14:56:45,969 DEV : loss 1.1596801280975342 - score 0.532
2020-05-17 14:56:46,033 BAD EPOCHS (no improvement): 3
2020-05-17 14:56:56,870 ----------------------------------------------------------------------------------------------------
2020-05-17 14:56:56,963 epoch 8 - iter 0/429 - loss 1.19991672 - samples/sec: 15123.90
2020-05-17 14:57:00,226 epoch 8 - iter 42/429 - loss 1.33415430 - samples/sec: 413.15
2020-05-17 14:57:03,596 epoch 8 - iter 84/429 - loss 1.31554907 - samples/sec: 399.94
2020-05-17 14:57:06,851 epoch 8 - iter 126/429 - loss 1.31261658 - samples/sec: 414.07
2020-05-17 14:57:10,206 epoch 8 - iter 168/429 - loss 1.29076184 - samples/sec: 401.80
2020-05-17 14:57:13,599 epoch 8 - iter 210/429 - loss 1.27832038 - samples/sec: 397.15
2020-05-17 14:57:16,927 epoch 8 - iter 252/429 - loss 1.27460625 - samples/sec: 405.24
2020-05-17 14:57:20,333 epoch 8 - iter 294/429 - loss 1.27091674 - samples/sec: 395.76
2020-05-17 14:57:23,679 epoch 8 - iter 336/429 - loss 1.26

2020-05-17 15:00:39,716 ----------------------------------------------------------------------------------------------------
2020-05-17 15:00:39,717 EPOCH 13 done: loss 1.0673 - lr 0.0500
2020-05-17 15:00:40,643 DEV : loss 1.0098029375076294 - score 0.602
2020-05-17 15:00:40,707 BAD EPOCHS (no improvement): 0
2020-05-17 15:00:51,388 ----------------------------------------------------------------------------------------------------
2020-05-17 15:00:51,473 epoch 14 - iter 0/429 - loss 0.75870889 - samples/sec: 16624.43
2020-05-17 15:00:54,749 epoch 14 - iter 42/429 - loss 1.03701408 - samples/sec: 411.45
2020-05-17 15:00:58,054 epoch 14 - iter 84/429 - loss 1.04540966 - samples/sec: 407.76
2020-05-17 15:01:01,378 epoch 14 - iter 126/429 - loss 1.04872718 - samples/sec: 405.65
2020-05-17 15:01:04,655 epoch 14 - iter 168/429 - loss 1.05151839 - samples/sec: 411.22
2020-05-17 15:01:08,083 epoch 14 - iter 210/429 - loss 1.05908856 - samples/sec: 393.36
2020-05-17 15:01:11,446 epoch 14 - ite

  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 15:02:48,374 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 256541.85it/s]

2020-05-17 15:02:48,431 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:02:48,432 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 217690.96it/s]

2020-05-17 15:02:48,498 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:02:48,501 ----------------------------------------------------------------------------------------------------
2020-05-17 15:02:48,503 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('en-twitter')
    )
    (word_reprojection_map): Linear(in_features=100, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 15:02:48,504 ----------------------------------------------------------------------------------------------------
2020-05-17 15:02:48,505 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 15:02:48,506 ----------------------------------------------------------------------------------------------------
2020-05-17 




2020-05-17 15:02:48,681 epoch 1 - iter 0/429 - loss 1.64963782 - samples/sec: 8407.05
2020-05-17 15:02:53,371 epoch 1 - iter 42/429 - loss 1.47375114 - samples/sec: 287.37
2020-05-17 15:02:57,779 epoch 1 - iter 84/429 - loss 1.47503687 - samples/sec: 305.95
2020-05-17 15:03:03,635 epoch 1 - iter 126/429 - loss 1.46430009 - samples/sec: 229.88
2020-05-17 15:03:07,727 epoch 1 - iter 168/429 - loss 1.46263328 - samples/sec: 329.10
2020-05-17 15:03:11,783 epoch 1 - iter 210/429 - loss 1.45666312 - samples/sec: 332.05
2020-05-17 15:03:15,829 epoch 1 - iter 252/429 - loss 1.44959366 - samples/sec: 332.87
2020-05-17 15:03:19,778 epoch 1 - iter 294/429 - loss 1.44477963 - samples/sec: 341.06
2020-05-17 15:03:23,794 epoch 1 - iter 336/429 - loss 1.43980878 - samples/sec: 335.45
2020-05-17 15:03:27,800 epoch 1 - iter 378/429 - loss 1.43464454 - samples/sec: 336.30
2020-05-17 15:03:31,773 epoch 1 - iter 420/429 - loss 1.42942987 - samples/sec: 339.04
2020-05-17 15:03:32,512 ----------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 15:03:45,250 ----------------------------------------------------------------------------------------------------
2020-05-17 15:03:45,323 epoch 2 - iter 0/429 - loss 1.36812377 - samples/sec: 19290.89
2020-05-17 15:03:48,145 epoch 2 - iter 42/429 - loss 1.37751475 - samples/sec: 477.67
2020-05-17 15:03:51,058 epoch 2 - iter 84/429 - loss 1.39024114 - samples/sec: 462.98
2020-05-17 15:03:53,996 epoch 2 - iter 126/429 - loss 1.38031038 - samples/sec: 458.96
2020-05-17 15:03:56,947 epoch 2 - iter 168/429 - loss 1.37995564 - samples/sec: 457.27
2020-05-17 15:03:59,878 epoch 2 - iter 210/429 - loss 1.37563201 - samples/sec: 460.02
2020-05-17 15:04:02,836 epoch 2 - iter 252/429 - loss 1.37002675 - samples/sec: 455.76
2020-05-17 15:04:05,777 epoch 2 - iter 294/429 - loss 1.36687120 - samples/sec: 458.48
2020-05-17 15:04:08,712 epoch 2 - iter 336/429 - loss 1.37000204 - samples/sec: 459.49
2020-05-17 15:04:11,638 epoch 2 - iter 378/429 - loss 1.37014989 - samples/sec: 460.67
2020-05

2020-05-17 15:07:19,012 DEV : loss 1.104432225227356 - score 0.542
2020-05-17 15:07:19,075 BAD EPOCHS (no improvement): 4
2020-05-17 15:07:28,381 ----------------------------------------------------------------------------------------------------
2020-05-17 15:07:28,458 epoch 8 - iter 0/429 - loss 1.05615699 - samples/sec: 17982.53
2020-05-17 15:07:31,409 epoch 8 - iter 42/429 - loss 1.14649880 - samples/sec: 456.79
2020-05-17 15:07:34,590 epoch 8 - iter 84/429 - loss 1.15708696 - samples/sec: 423.86
2020-05-17 15:07:37,736 epoch 8 - iter 126/429 - loss 1.15409450 - samples/sec: 428.54
2020-05-17 15:07:40,985 epoch 8 - iter 168/429 - loss 1.15004397 - samples/sec: 414.98
2020-05-17 15:07:44,141 epoch 8 - iter 210/429 - loss 1.14667066 - samples/sec: 427.11
2020-05-17 15:07:47,184 epoch 8 - iter 252/429 - loss 1.14609123 - samples/sec: 442.90
2020-05-17 15:07:50,344 epoch 8 - iter 294/429 - loss 1.14332324 - samples/sec: 426.81
2020-05-17 15:07:53,493 epoch 8 - iter 336/429 - loss 1.144

2020-05-17 15:11:17,767 ----------------------------------------------------------------------------------------------------
2020-05-17 15:11:17,768 EPOCH 13 done: loss 1.0609 - lr 0.1000
2020-05-17 15:11:18,704 DEV : loss 1.0136406421661377 - score 0.586
2020-05-17 15:11:18,772 BAD EPOCHS (no improvement): 0
2020-05-17 15:11:28,552 ----------------------------------------------------------------------------------------------------
2020-05-17 15:11:28,634 epoch 14 - iter 0/429 - loss 0.71913862 - samples/sec: 17252.16
2020-05-17 15:11:31,900 epoch 14 - iter 42/429 - loss 1.03049056 - samples/sec: 412.74
2020-05-17 15:11:35,107 epoch 14 - iter 84/429 - loss 1.03517302 - samples/sec: 420.14
2020-05-17 15:11:38,498 epoch 14 - iter 126/429 - loss 1.03354533 - samples/sec: 397.54
2020-05-17 15:11:41,736 epoch 14 - iter 168/429 - loss 1.05120019 - samples/sec: 416.07
2020-05-17 15:11:45,170 epoch 14 - iter 210/429 - loss 1.04613699 - samples/sec: 392.48
2020-05-17 15:11:48,527 epoch 14 - ite

  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 15:13:42,807 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 243334.70it/s]

2020-05-17 15:13:42,867 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:13:42,868 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 264362.60it/s]

2020-05-17 15:13:42,924 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:13:42,930 ----------------------------------------------------------------------------------------------------
2020-05-17 15:13:42,931 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): ELMoEmbeddings(model=0-elmo-original)
    )
    (word_reprojection_map): Linear(in_features=3072, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 15:13:42,932 ----------------------------------------------------------------------------------------------------
2020-05-17 15:13:42,933 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 15:13:42,933 ----------------------------------------------------------------------------------------------------
2




2020-05-17 15:13:43,451 epoch 1 - iter 0/429 - loss 1.59219003 - samples/sec: 2677.00
2020-05-17 15:14:04,222 epoch 1 - iter 42/429 - loss 1.32942523 - samples/sec: 64.73
2020-05-17 15:14:25,115 epoch 1 - iter 84/429 - loss 1.26155745 - samples/sec: 64.36
2020-05-17 15:14:46,123 epoch 1 - iter 126/429 - loss 1.23122434 - samples/sec: 64.00
2020-05-17 15:15:06,699 epoch 1 - iter 168/429 - loss 1.20584597 - samples/sec: 65.35
2020-05-17 15:15:26,632 epoch 1 - iter 210/429 - loss 1.17989902 - samples/sec: 67.46
2020-05-17 15:15:46,573 epoch 1 - iter 252/429 - loss 1.15719575 - samples/sec: 67.43
2020-05-17 15:16:09,042 epoch 1 - iter 294/429 - loss 1.14250289 - samples/sec: 59.84
2020-05-17 15:16:29,137 epoch 1 - iter 336/429 - loss 1.12845745 - samples/sec: 66.93
2020-05-17 15:16:49,135 epoch 1 - iter 378/429 - loss 1.11862881 - samples/sec: 67.24
2020-05-17 15:17:09,160 epoch 1 - iter 420/429 - loss 1.10552740 - samples/sec: 67.15
2020-05-17 15:17:12,839 --------------------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 15:17:21,198 ----------------------------------------------------------------------------------------------------
2020-05-17 15:17:21,272 epoch 2 - iter 0/429 - loss 0.81793642 - samples/sec: 19013.32
2020-05-17 15:17:24,568 epoch 2 - iter 42/429 - loss 0.94525675 - samples/sec: 409.57
2020-05-17 15:17:28,010 epoch 2 - iter 84/429 - loss 0.94374180 - samples/sec: 391.75
2020-05-17 15:17:31,497 epoch 2 - iter 126/429 - loss 0.93746258 - samples/sec: 386.38
2020-05-17 15:17:35,028 epoch 2 - iter 168/429 - loss 0.94032681 - samples/sec: 381.63
2020-05-17 15:17:38,524 epoch 2 - iter 210/429 - loss 0.93683939 - samples/sec: 385.41
2020-05-17 15:17:42,070 epoch 2 - iter 252/429 - loss 0.93591073 - samples/sec: 380.03
2020-05-17 15:17:45,561 epoch 2 - iter 294/429 - loss 0.94224333 - samples/sec: 386.03
2020-05-17 15:17:48,816 epoch 2 - iter 336/429 - loss 0.94593177 - samples/sec: 414.03
2020-05-17 15:17:52,387 epoch 2 - iter 378/429 - loss 0.94446101 - samples/sec: 377.22
2020-05

2020-05-17 15:21:09,107 DEV : loss 0.7821012735366821 - score 0.69
2020-05-17 15:21:09,170 BAD EPOCHS (no improvement): 2
2020-05-17 15:21:09,171 ----------------------------------------------------------------------------------------------------
2020-05-17 15:21:09,263 epoch 8 - iter 0/429 - loss 0.62723714 - samples/sec: 15014.21
2020-05-17 15:21:12,810 epoch 8 - iter 42/429 - loss 0.55534861 - samples/sec: 379.86
2020-05-17 15:21:16,368 epoch 8 - iter 84/429 - loss 0.55918250 - samples/sec: 378.86
2020-05-17 15:21:19,847 epoch 8 - iter 126/429 - loss 0.56417803 - samples/sec: 387.94
2020-05-17 15:21:23,435 epoch 8 - iter 168/429 - loss 0.56822268 - samples/sec: 375.63
2020-05-17 15:21:27,098 epoch 8 - iter 210/429 - loss 0.56650969 - samples/sec: 367.89
2020-05-17 15:21:30,723 epoch 8 - iter 252/429 - loss 0.56710586 - samples/sec: 371.67
2020-05-17 15:21:34,293 epoch 8 - iter 294/429 - loss 0.57293475 - samples/sec: 378.12
2020-05-17 15:21:37,908 epoch 8 - iter 336/429 - loss 0.574

2020-05-17 15:24:55,187 ----------------------------------------------------------------------------------------------------
2020-05-17 15:24:55,188 EPOCH 13 done: loss 0.3998 - lr 0.1000
2020-05-17 15:24:56,202 DEV : loss 0.7134566307067871 - score 0.79
2020-05-17 15:24:56,267 BAD EPOCHS (no improvement): 0
2020-05-17 15:24:57,432 ----------------------------------------------------------------------------------------------------
2020-05-17 15:24:57,523 epoch 14 - iter 0/429 - loss 0.23951581 - samples/sec: 15453.97
2020-05-17 15:25:01,111 epoch 14 - iter 42/429 - loss 0.33651725 - samples/sec: 375.40
2020-05-17 15:25:04,679 epoch 14 - iter 84/429 - loss 0.34918969 - samples/sec: 377.62
2020-05-17 15:25:08,282 epoch 14 - iter 126/429 - loss 0.35269781 - samples/sec: 374.07
2020-05-17 15:25:11,903 epoch 14 - iter 168/429 - loss 0.35979062 - samples/sec: 372.07
2020-05-17 15:25:15,542 epoch 14 - iter 210/429 - loss 0.36416262 - samples/sec: 370.30
2020-05-17 15:25:19,142 epoch 14 - iter

In [50]:
# Flair
# https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md
total_time = time.time()
word_embeddings = [ # FlairEmbeddings('multi-forward'), # this is 300 languge, gave very low score
                  # FlairEmbeddings('multi-backward'), 
                   FlairEmbeddings('news-forward'),  #  	English 	Trained with 1 billion word corpus   
                  ]
#modelname = 'Flair-news-fwd'
modelname = 'Flair'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

# 3k and over, OOM, -> changed setting embedding to cpu instead of gpu. 
#train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS, batch_size=8 )

print(time.time() - total_time)

# training times 3K char long datafields
# 2280 sec

# 1,5K char,  100 test-set, 51 s


# older
# 167s in 100 train
# 620s for 18k

# tiny - 500char, 500 test-set:  33 sec

# large, 624 sec

Flair
2020-05-17 15:41:33,898 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 15:41:33,899 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 15:41:33,899 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 15:41:33,900 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 15:41:43,553 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 265965.88it/s]

2020-05-17 15:41:43,608 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:41:43,609 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 259904.79it/s]

2020-05-17 15:41:43,665 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 15:41:43,669 ----------------------------------------------------------------------------------------------------
2020-05-17 15:41:43,670 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
          (decoder): Linear(in_features=2048, out_features=300, bias=True)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=2048, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 15:41:43,671 ----------------------------------------------------------------------------------------




2020-05-17 15:41:43,950 epoch 1 - iter 0/429 - loss 1.62657619 - samples/sec: 5123.95
2020-05-17 15:41:59,583 epoch 1 - iter 42/429 - loss 1.48372576 - samples/sec: 86.06
2020-05-17 15:42:08,839 epoch 1 - iter 84/429 - loss 1.46120525 - samples/sec: 145.42
2020-05-17 15:42:17,865 epoch 1 - iter 126/429 - loss 1.44395296 - samples/sec: 149.12
2020-05-17 15:42:26,673 epoch 1 - iter 168/429 - loss 1.42902194 - samples/sec: 152.86
2020-05-17 15:42:35,777 epoch 1 - iter 210/429 - loss 1.40975951 - samples/sec: 147.89
2020-05-17 15:42:45,054 epoch 1 - iter 252/429 - loss 1.38686975 - samples/sec: 145.10
2020-05-17 15:42:54,383 epoch 1 - iter 294/429 - loss 1.37051187 - samples/sec: 144.33
2020-05-17 15:43:03,599 epoch 1 - iter 336/429 - loss 1.35483527 - samples/sec: 146.06
2020-05-17 15:43:12,774 epoch 1 - iter 378/429 - loss 1.34072678 - samples/sec: 146.72
2020-05-17 15:43:23,350 epoch 1 - iter 420/429 - loss 1.32819519 - samples/sec: 127.24
2020-05-17 15:43:24,962 -----------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 15:43:28,467 ----------------------------------------------------------------------------------------------------
2020-05-17 15:43:28,550 epoch 2 - iter 0/429 - loss 1.13285327 - samples/sec: 16782.46
2020-05-17 15:43:31,911 epoch 2 - iter 42/429 - loss 1.21631105 - samples/sec: 401.51
2020-05-17 15:43:35,159 epoch 2 - iter 84/429 - loss 1.22235621 - samples/sec: 415.85
2020-05-17 15:43:38,268 epoch 2 - iter 126/429 - loss 1.20900943 - samples/sec: 434.14
2020-05-17 15:43:41,412 epoch 2 - iter 168/429 - loss 1.21145808 - samples/sec: 429.35
2020-05-17 15:43:44,535 epoch 2 - iter 210/429 - loss 1.21060712 - samples/sec: 432.44
2020-05-17 15:43:47,659 epoch 2 - iter 252/429 - loss 1.20469070 - samples/sec: 432.41
2020-05-17 15:43:50,764 epoch 2 - iter 294/429 - loss 1.20665145 - samples/sec: 435.92
2020-05-17 15:43:53,955 epoch 2 - iter 336/429 - loss 1.21015676 - samples/sec: 423.00
2020-05-17 15:43:57,214 epoch 2 - iter 378/429 - loss 1.20946703 - samples/sec: 414.20
2020-05

2020-05-17 15:47:02,017 DEV : loss 1.0649515390396118 - score 0.572
2020-05-17 15:47:02,085 BAD EPOCHS (no improvement): 0
2020-05-17 15:47:02,334 ----------------------------------------------------------------------------------------------------
2020-05-17 15:47:02,428 epoch 8 - iter 0/429 - loss 1.14055955 - samples/sec: 14670.25
2020-05-17 15:47:05,983 epoch 8 - iter 42/429 - loss 1.10108181 - samples/sec: 379.64
2020-05-17 15:47:09,284 epoch 8 - iter 84/429 - loss 1.09338141 - samples/sec: 408.76
2020-05-17 15:47:12,632 epoch 8 - iter 126/429 - loss 1.09076137 - samples/sec: 403.73
2020-05-17 15:47:15,979 epoch 8 - iter 168/429 - loss 1.09054697 - samples/sec: 403.36
2020-05-17 15:47:19,355 epoch 8 - iter 210/429 - loss 1.08356123 - samples/sec: 400.42
2020-05-17 15:47:22,672 epoch 8 - iter 252/429 - loss 1.07893989 - samples/sec: 407.37
2020-05-17 15:47:25,918 epoch 8 - iter 294/429 - loss 1.07537267 - samples/sec: 415.96
2020-05-17 15:47:29,310 epoch 8 - iter 336/429 - loss 1.07

2020-05-17 15:50:30,671 ----------------------------------------------------------------------------------------------------
2020-05-17 15:50:30,672 EPOCH 13 done: loss 1.0246 - lr 0.1000
2020-05-17 15:50:31,542 DEV : loss 1.0257632732391357 - score 0.568
2020-05-17 15:50:31,604 BAD EPOCHS (no improvement): 1
2020-05-17 15:50:31,605 ----------------------------------------------------------------------------------------------------
2020-05-17 15:50:31,688 epoch 14 - iter 0/429 - loss 0.69253856 - samples/sec: 16634.39
2020-05-17 15:50:34,926 epoch 14 - iter 42/429 - loss 1.00138802 - samples/sec: 416.96
2020-05-17 15:50:38,198 epoch 14 - iter 84/429 - loss 0.98482730 - samples/sec: 412.65
2020-05-17 15:50:41,427 epoch 14 - iter 126/429 - loss 0.99109571 - samples/sec: 418.44
2020-05-17 15:50:44,643 epoch 14 - iter 168/429 - loss 1.00574006 - samples/sec: 419.76
2020-05-17 15:50:47,890 epoch 14 - iter 210/429 - loss 1.00676128 - samples/sec: 415.69
2020-05-17 15:50:51,153 epoch 14 - ite

In [51]:
# BERT OOMs on 32 batch, use 8
total_time = time.time()

# Bert Cased - separating lower and upper case, uncased - ignoring case.
# Use cased

word_embeddings = [ BertEmbeddings('bert-base-cased'),                ]
modelname = 'bert-base-cased'
train_and_predict_single(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=15, batch_size=8)

print(time.time() - total_time)

#word_embeddings = [ BertEmbeddings('bert-base-uncased'),                ]
#modelname = 'bert-base-uncased'
#train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS, batch_size=8)


# 19:30 atleast was started
# 20:03 still doing
# 

# 450 s 18k


# on batch run: RuntimeError: CUDA error: device-side assert triggered
# prediction, with embedding sentences.
#
#   for embedding in self.embeddings:
#              embedding.embed(sentences)
#
# Possibly text too long for Bert embedding, length of the text the flair does not check beforehand

# 13k -> 2400 s

bert-base-cased
2020-05-17 16:03:39,687 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 16:03:39,687 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 16:03:39,688 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 16:03:39,688 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 16:03:54,150 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 251258.59it/s]

2020-05-17 16:03:54,207 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 16:03:54,208 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 250388.23it/s]

2020-05-17 16:03:54,266 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 16:03:54,271 ----------------------------------------------------------------------------------------------------
2020-05-17 16:03:54,274 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): BertEmbeddings(
        (model): BertModel(
          (embeddings): BertEmbeddings(
            (word_embeddings): Embedding(28996, 768, padding_idx=0)
            (position_embeddings): Embedding(512, 768)
            (token_type_embeddings): Embedding(2, 768)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (encoder): BertEncoder(
            (layer): ModuleList(
              (0): BertLayer(
                (attention): BertAttention(
                  (self): BertSelfAttention(
                    (query): Linear(in_features=768, out_features=768

2020-05-17 16:03:54,276 ----------------------------------------------------------------------------------------------------
2020-05-17 16:03:54,277 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 16:03:54,278 ----------------------------------------------------------------------------------------------------
2020-05-17 16:03:54,279 Parameters:
2020-05-17 16:03:54,281  - learning_rate: "0.1"
2020-05-17 16:03:54,282  - mini_batch_size: "8"
2020-05-17 16:03:54,282  - patience: "5"
2020-05-17 16:03:54,283  - anneal_factor: "0.5"
2020-05-17 16:03:54,283  - max_epochs: "15"
2020-05-17 16:03:54,284  - shuffle: "True"
2020-05-17 16:03:54,285  - train_with_dev: "False"
2020-05-17 16:03:54,285  - batch_growth_annealing: "False"
2020-05-17 16:03:54,286 ----------------------------------------------------------------------------------------------------
2020-05-17 16:03:54,286 Model training base path: "."
2020-05-17 16:03:54,287 --------------------------------------------




2020-05-17 16:03:55,221 epoch 1 - iter 0/1715 - loss 1.57973135 - samples/sec: 1478.86
2020-05-17 16:06:22,164 epoch 1 - iter 171/1715 - loss 1.59164723 - samples/sec: 9.32
2020-05-17 16:08:48,237 epoch 1 - iter 342/1715 - loss 1.50371604 - samples/sec: 9.38
2020-05-17 16:11:17,432 epoch 1 - iter 513/1715 - loss 1.44562274 - samples/sec: 9.18
2020-05-17 16:13:42,869 epoch 1 - iter 684/1715 - loss 1.40248368 - samples/sec: 9.42
2020-05-17 16:16:08,003 epoch 1 - iter 855/1715 - loss 1.37099723 - samples/sec: 9.44
2020-05-17 16:18:36,399 epoch 1 - iter 1026/1715 - loss 1.33394253 - samples/sec: 9.23
2020-05-17 16:21:02,087 epoch 1 - iter 1197/1715 - loss 1.31195148 - samples/sec: 9.40
2020-05-17 16:23:26,930 epoch 1 - iter 1368/1715 - loss 1.30067147 - samples/sec: 9.46
2020-05-17 16:25:50,898 epoch 1 - iter 1539/1715 - loss 1.28161208 - samples/sec: 9.51
2020-05-17 16:28:19,444 epoch 1 - iter 1710/1715 - loss 1.26349621 - samples/sec: 9.22
2020-05-17 16:28:22,639 ------------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 16:29:16,363 ----------------------------------------------------------------------------------------------------
2020-05-17 16:29:16,396 epoch 2 - iter 0/1715 - loss 1.15191483 - samples/sec: 48307.40
2020-05-17 16:29:21,776 epoch 2 - iter 171/1715 - loss 1.15310769 - samples/sec: 261.02
2020-05-17 16:29:27,139 epoch 2 - iter 342/1715 - loss 1.15644752 - samples/sec: 262.76
2020-05-17 16:29:32,296 epoch 2 - iter 513/1715 - loss 1.15017900 - samples/sec: 274.59
2020-05-17 16:29:37,757 epoch 2 - iter 684/1715 - loss 1.15244119 - samples/sec: 257.79
2020-05-17 16:29:43,231 epoch 2 - iter 855/1715 - loss 1.15782246 - samples/sec: 257.65
2020-05-17 16:29:48,496 epoch 2 - iter 1026/1715 - loss 1.15163452 - samples/sec: 268.07
2020-05-17 16:29:54,049 epoch 2 - iter 1197/1715 - loss 1.15500254 - samples/sec: 253.45
2020-05-17 16:29:59,356 epoch 2 - iter 1368/1715 - loss 1.15963339 - samples/sec: 265.65
2020-05-17 16:30:05,113 epoch 2 - iter 1539/1715 - loss 1.16189792 - samples/sec

2020-05-17 16:34:59,084 EPOCH 7 done: loss 1.1108 - lr 0.1000
2020-05-17 16:35:00,215 DEV : loss 1.0914437770843506 - score 0.528
2020-05-17 16:35:00,281 BAD EPOCHS (no improvement): 3
2020-05-17 16:35:00,282 ----------------------------------------------------------------------------------------------------
2020-05-17 16:35:00,326 epoch 8 - iter 0/1715 - loss 0.92333120 - samples/sec: 36885.80
2020-05-17 16:35:05,784 epoch 8 - iter 171/1715 - loss 1.11660788 - samples/sec: 257.47
2020-05-17 16:35:11,286 epoch 8 - iter 342/1715 - loss 1.12041234 - samples/sec: 257.58
2020-05-17 16:35:17,084 epoch 8 - iter 513/1715 - loss 1.12835039 - samples/sec: 243.57
2020-05-17 16:35:22,812 epoch 8 - iter 684/1715 - loss 1.11371967 - samples/sec: 245.75
2020-05-17 16:35:28,588 epoch 8 - iter 855/1715 - loss 1.11134972 - samples/sec: 244.32
2020-05-17 16:35:34,294 epoch 8 - iter 1026/1715 - loss 1.10909714 - samples/sec: 247.34
2020-05-17 16:35:39,662 epoch 8 - iter 1197/1715 - loss 1.11494113 - samp

2020-05-17 16:40:27,100 epoch 13 - iter 1539/1715 - loss 1.01898701 - samples/sec: 257.34
2020-05-17 16:40:31,601 epoch 13 - iter 1710/1715 - loss 1.01477844 - samples/sec: 313.38
2020-05-17 16:40:31,849 ----------------------------------------------------------------------------------------------------
2020-05-17 16:40:31,850 EPOCH 13 done: loss 1.0142 - lr 0.0500
2020-05-17 16:40:32,879 DEV : loss 0.9682843089103699 - score 0.594
2020-05-17 16:40:32,942 BAD EPOCHS (no improvement): 0
2020-05-17 16:40:34,216 ----------------------------------------------------------------------------------------------------
2020-05-17 16:40:34,246 epoch 14 - iter 0/1715 - loss 0.94932169 - samples/sec: 53737.37
2020-05-17 16:40:39,012 epoch 14 - iter 171/1715 - loss 0.97430299 - samples/sec: 295.50
2020-05-17 16:40:44,592 epoch 14 - iter 342/1715 - loss 0.97538048 - samples/sec: 252.52
2020-05-17 16:40:49,738 epoch 14 - iter 513/1715 - loss 0.97889644 - samples/sec: 273.89
2020-05-17 16:40:54,856 epoc

In [52]:
#BPE - takes memory - reduce batch size radically!
total_time = time.time()

word_embeddings = [ BytePairEmbeddings('en'),   ]
modelname = 'BytePairEmbedding'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS, batch_size=8)               

print(time.time() - total_time)

BytePairEmbedding
2020-05-17 16:47:04,556 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 16:47:04,557 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 16:47:04,558 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 16:47:04,558 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 16:47:21,424 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 234207.26it/s]

2020-05-17 16:47:21,486 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 16:47:21,487 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 257069.10it/s]

2020-05-17 16:47:21,544 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 16:47:21,547 ----------------------------------------------------------------------------------------------------
2020-05-17 16:47:21,548 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): BytePairEmbeddings(model=0-bpe-en-100000-50)
    )
    (word_reprojection_map): Linear(in_features=100, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 16:47:21,550 ----------------------------------------------------------------------------------------------------
2020-05-17 16:47:21,550 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 16:47:21,551 ------------------------------------------------------------------------------------------------




2020-05-17 16:47:21,639 epoch 1 - iter 0/1715 - loss 1.64689314 - samples/sec: 19620.26
2020-05-17 16:47:32,675 epoch 1 - iter 171/1715 - loss 1.46176034 - samples/sec: 124.08
2020-05-17 16:47:43,392 epoch 1 - iter 342/1715 - loss 1.45667674 - samples/sec: 127.86
2020-05-17 16:47:54,166 epoch 1 - iter 513/1715 - loss 1.44526867 - samples/sec: 127.09
2020-05-17 16:48:04,864 epoch 1 - iter 684/1715 - loss 1.43354224 - samples/sec: 127.99
2020-05-17 16:48:15,315 epoch 1 - iter 855/1715 - loss 1.42067421 - samples/sec: 131.03
2020-05-17 16:48:25,977 epoch 1 - iter 1026/1715 - loss 1.40796098 - samples/sec: 128.43
2020-05-17 16:48:36,519 epoch 1 - iter 1197/1715 - loss 1.39630480 - samples/sec: 129.91
2020-05-17 16:48:49,584 epoch 1 - iter 1368/1715 - loss 1.38928959 - samples/sec: 104.80
2020-05-17 16:49:00,150 epoch 1 - iter 1539/1715 - loss 1.38341093 - samples/sec: 129.61
2020-05-17 16:49:10,785 epoch 1 - iter 1710/1715 - loss 1.37521932 - samples/sec: 128.76
2020-05-17 16:49:11,032 ---

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 16:49:15,020 ----------------------------------------------------------------------------------------------------
2020-05-17 16:49:15,059 epoch 2 - iter 0/1715 - loss 1.63396847 - samples/sec: 39174.75


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 16:49:19,629 epoch 2 - iter 171/1715 - loss 1.31060433 - samples/sec: 300.14
2020-05-17 16:49:24,242 epoch 2 - iter 342/1715 - loss 1.31338758 - samples/sec: 297.29
2020-05-17 16:49:28,819 epoch 2 - iter 513/1715 - loss 1.31107140 - samples/sec: 299.74
2020-05-17 16:49:33,402 epoch 2 - iter 684/1715 - loss 1.31032169 - samples/sec: 299.55
2020-05-17 16:49:37,960 epoch 2 - iter 855/1715 - loss 1.30592586 - samples/sec: 301.06
2020-05-17 16:49:42,609 epoch 2 - iter 1026/1715 - loss 1.30149488 - samples/sec: 295.33
2020-05-17 16:49:46,390 epoch 2 - iter 1197/1715 - loss 1.30320098 - samples/sec: 363.33
2020-05-17 16:49:50,987 epoch 2 - iter 1368/1715 - loss 1.30870206 - samples/sec: 298.55
2020-05-17 16:49:55,607 epoch 2 - iter 1539/1715 - loss 1.30752952 - samples/sec: 297.11
2020-05-17 16:50:00,276 epoch 2 - iter 1710/1715 - loss 1.30771399 - samples/sec: 293.82
2020-05-17 16:50:00,401 -------------------------------------------------------------------------------------------

2020-05-17 16:53:53,137 ----------------------------------------------------------------------------------------------------
2020-05-17 16:53:53,168 epoch 8 - iter 0/1715 - loss 0.82208282 - samples/sec: 48532.54
2020-05-17 16:53:57,776 epoch 8 - iter 171/1715 - loss 1.02148490 - samples/sec: 297.99
2020-05-17 16:54:02,325 epoch 8 - iter 342/1715 - loss 1.01442451 - samples/sec: 301.63
2020-05-17 16:54:06,907 epoch 8 - iter 513/1715 - loss 1.03322960 - samples/sec: 299.28
2020-05-17 16:54:10,521 epoch 8 - iter 684/1715 - loss 1.02058364 - samples/sec: 380.03
2020-05-17 16:54:14,717 epoch 8 - iter 855/1715 - loss 1.02355808 - samples/sec: 326.73
2020-05-17 16:54:19,210 epoch 8 - iter 1026/1715 - loss 1.01847311 - samples/sec: 305.39
2020-05-17 16:54:23,744 epoch 8 - iter 1197/1715 - loss 1.01613042 - samples/sec: 302.80
2020-05-17 16:54:28,365 epoch 8 - iter 1368/1715 - loss 1.01305997 - samples/sec: 297.09
2020-05-17 16:54:32,978 epoch 8 - iter 1539/1715 - loss 1.01232175 - samples/sec

2020-05-17 16:58:30,824 EPOCH 13 done: loss 0.8560 - lr 0.1000
2020-05-17 16:58:31,735 DEV : loss 0.8043615818023682 - score 0.692
2020-05-17 16:58:31,801 BAD EPOCHS (no improvement): 0
2020-05-17 16:58:32,522 ----------------------------------------------------------------------------------------------------
2020-05-17 16:58:32,550 epoch 14 - iter 0/1715 - loss 0.56504428 - samples/sec: 54411.56
2020-05-17 16:58:36,511 epoch 14 - iter 171/1715 - loss 0.77782936 - samples/sec: 346.64
2020-05-17 16:58:41,052 epoch 14 - iter 342/1715 - loss 0.81480094 - samples/sec: 302.35
2020-05-17 16:58:45,578 epoch 14 - iter 513/1715 - loss 0.80179396 - samples/sec: 303.18
2020-05-17 16:58:50,121 epoch 14 - iter 684/1715 - loss 0.81738869 - samples/sec: 301.96
2020-05-17 16:58:54,656 epoch 14 - iter 855/1715 - loss 0.81996810 - samples/sec: 302.53
2020-05-17 16:58:59,144 epoch 14 - iter 1026/1715 - loss 0.82458848 - samples/sec: 306.05
2020-05-17 16:59:03,694 epoch 14 - iter 1197/1715 - loss 0.828879

In [71]:
# GPT-1

total_time = time.time()
#Do we need batchsize 8 here?

word_embeddings = [ OpenAIGPTEmbeddings(),                ]
modelname = 'gpt-1'
# train_and_predict_single(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS, batch_size=8)
train_and_predict_single(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS, batch_size=4)

print(time.time() - total_time)

# 302 sec 18k

# on batch mode gave: IndexError: index 0 is out of bounds for dimension 0 with size 0

# tiny 500,500 94 sec

# single mode also
# size: 1000, 200, 500   IndexError: index 0 is out of bounds for dimension 0 with size 0
#
# possible error in flair tokenization
# https://github.com/flairNLP/flair/issues/1366
#
# or unrecognized character given empty embedding instead of unknown-token
# https://github.com/flairNLP/flair/issues/1221



gpt-1
2020-05-18 16:17:10,310 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-18 16:17:10,312 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-18 16:17:10,312 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-18 16:17:10,313 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-18 16:17:14,455 Computing label dictionary. Progress:


100%|██████████| 200/200 [00:00<00:00, 90972.87it/s]

2020-05-18 16:17:14,461 [b'1', b'0', b'4', b'3', b'2']
2020-05-18 16:17:14,462 Computing label dictionary. Progress:



100%|██████████| 200/200 [00:00<00:00, 248994.00it/s]

2020-05-18 16:17:14,467 [b'1', b'0', b'4', b'3', b'2']
2020-05-18 16:17:14,473 ----------------------------------------------------------------------------------------------------
2020-05-18 16:17:14,476 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): OpenAIGPTEmbeddings(
        model=0-openai-gpt
        (model): OpenAIGPTModel(
          (tokens_embed): Embedding(40478, 768)
          (positions_embed): Embedding(512, 768)
          (drop): Dropout(p=0.1, inplace=False)
          (h): ModuleList(
            (0): Block(
              (attn): Attention(
                (c_attn): Conv1D()
                (c_proj): Conv1D()
                (attn_dropout): Dropout(p=0.1, inplace=False)
                (resid_dropout): Dropout(p=0.1, inplace=False)
              )
              (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
              (mlp): MLP(
                (c_fc): Conv1D()
     

2020-05-18 16:17:14,478 Corpus: "Corpus: 200 train + 500 dev + 500 test sentences"
2020-05-18 16:17:14,478 ----------------------------------------------------------------------------------------------------
2020-05-18 16:17:14,479 Parameters:
2020-05-18 16:17:14,479  - learning_rate: "0.1"
2020-05-18 16:17:14,480  - mini_batch_size: "4"
2020-05-18 16:17:14,481  - patience: "5"
2020-05-18 16:17:14,481  - anneal_factor: "0.5"
2020-05-18 16:17:14,482  - max_epochs: "15"
2020-05-18 16:17:14,483  - shuffle: "True"
2020-05-18 16:17:14,484  - train_with_dev: "False"
2020-05-18 16:17:14,485  - batch_growth_annealing: "False"
2020-05-18 16:17:14,485 ----------------------------------------------------------------------------------------------------
2020-05-18 16:17:14,486 Model training base path: "."
2020-05-18 16:17:14,487 ----------------------------------------------------------------------------------------------------
2020-05-18 16:17:14,490 Device: cuda:0
2020-05-18 16:17:14,491 -------




2020-05-18 16:17:15,023 epoch 1 - iter 0/50 - loss 1.57646501 - samples/sec: 38.11
2020-05-18 16:17:17,764 epoch 1 - iter 5/50 - loss 1.66154005 - samples/sec: 7.60
2020-05-18 16:17:20,372 epoch 1 - iter 10/50 - loss 1.65476380 - samples/sec: 7.96
2020-05-18 16:17:22,956 epoch 1 - iter 15/50 - loss 1.61488535 - samples/sec: 8.02
2020-05-18 16:17:25,502 epoch 1 - iter 20/50 - loss 1.59133152 - samples/sec: 8.15
2020-05-18 16:17:27,999 epoch 1 - iter 25/50 - loss 1.59074516 - samples/sec: 8.31
2020-05-18 16:17:30,742 epoch 1 - iter 30/50 - loss 1.60192063 - samples/sec: 7.57
2020-05-18 16:17:33,309 epoch 1 - iter 35/50 - loss 1.58490951 - samples/sec: 8.09
2020-05-18 16:17:35,824 epoch 1 - iter 40/50 - loss 1.62698720 - samples/sec: 8.24
2020-05-18 16:17:38,355 epoch 1 - iter 45/50 - loss 1.61725956 - samples/sec: 8.19
2020-05-18 16:17:41,058 ----------------------------------------------------------------------------------------------------
2020-05-18 16:17:41,059 EPOCH 1 done: loss 1.6

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-18 16:18:43,890 ----------------------------------------------------------------------------------------------------
2020-05-18 16:18:43,951 epoch 2 - iter 0/50 - loss 1.22715783 - samples/sec: 342.27
2020-05-18 16:18:44,350 epoch 2 - iter 5/50 - loss 1.27280040 - samples/sec: 67.04
2020-05-18 16:18:44,714 epoch 2 - iter 10/50 - loss 1.41183447 - samples/sec: 74.33
2020-05-18 16:18:45,090 epoch 2 - iter 15/50 - loss 1.42460554 - samples/sec: 71.58
2020-05-18 16:18:45,450 epoch 2 - iter 20/50 - loss 1.44614340 - samples/sec: 73.86
2020-05-18 16:18:45,829 epoch 2 - iter 25/50 - loss 1.42678155 - samples/sec: 70.07
2020-05-18 16:18:46,244 epoch 2 - iter 30/50 - loss 1.43389569 - samples/sec: 62.48
2020-05-18 16:18:46,621 epoch 2 - iter 35/50 - loss 1.41338287 - samples/sec: 69.91
2020-05-18 16:18:46,981 epoch 2 - iter 40/50 - loss 1.38628011 - samples/sec: 73.72
2020-05-18 16:18:47,375 epoch 2 - iter 45/50 - loss 1.36595527 - samples/sec: 66.40
2020-05-18 16:18:47,725 ------------

2020-05-18 16:19:40,709 epoch 8 - iter 30/50 - loss 0.55062598 - samples/sec: 70.29
2020-05-18 16:19:41,082 epoch 8 - iter 35/50 - loss 0.54324865 - samples/sec: 70.83
2020-05-18 16:19:41,465 epoch 8 - iter 40/50 - loss 0.49572363 - samples/sec: 69.94
2020-05-18 16:19:41,860 epoch 8 - iter 45/50 - loss 0.49190628 - samples/sec: 67.94
2020-05-18 16:19:42,185 ----------------------------------------------------------------------------------------------------
2020-05-18 16:19:42,186 EPOCH 8 done: loss 0.4756 - lr 0.1000
2020-05-18 16:19:46,662 DEV : loss 2.42663311958313 - score 0.296
Epoch     7: reducing learning rate of group 0 to 5.0000e-02.
2020-05-18 16:19:46,936 BAD EPOCHS (no improvement): 6
2020-05-18 16:19:46,937 ----------------------------------------------------------------------------------------------------
2020-05-18 16:19:46,992 epoch 9 - iter 0/50 - loss 0.02202588 - samples/sec: 386.82
2020-05-18 16:19:47,369 epoch 9 - iter 5/50 - loss 0.28313484 - samples/sec: 70.01
20

2020-05-18 16:20:35,510 EPOCH 14 done: loss 0.1435 - lr 0.0500
2020-05-18 16:20:39,853 DEV : loss 2.82846999168396 - score 0.276
2020-05-18 16:20:40,131 BAD EPOCHS (no improvement): 5
2020-05-18 16:20:40,133 ----------------------------------------------------------------------------------------------------
2020-05-18 16:20:40,193 epoch 15 - iter 0/50 - loss 0.06993622 - samples/sec: 349.11
2020-05-18 16:20:40,559 epoch 15 - iter 5/50 - loss 0.25170111 - samples/sec: 72.84
2020-05-18 16:20:40,933 epoch 15 - iter 10/50 - loss 0.16812468 - samples/sec: 70.76
2020-05-18 16:20:41,309 epoch 15 - iter 15/50 - loss 0.16616407 - samples/sec: 71.31
2020-05-18 16:20:41,685 epoch 15 - iter 20/50 - loss 0.18981475 - samples/sec: 69.75
2020-05-18 16:20:42,058 epoch 15 - iter 25/50 - loss 0.20163059 - samples/sec: 71.23
2020-05-18 16:20:42,425 epoch 15 - iter 30/50 - loss 0.18205608 - samples/sec: 73.01
2020-05-18 16:20:42,792 epoch 15 - iter 35/50 - loss 0.17685768 - samples/sec: 72.19
2020-05-18 1

In [53]:
# Mock UP - replace failing GPT-1
total_time = time.time()

word_embeddings = [ WordEmbeddings('glove'),              ]
modelname = 'gpt-1'
train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)

# word_embeddings = [ ELMoEmbeddings('original')              ]
# modelname = 'elmo'
# train_and_predict(word_embeddings, modelname, modeldesc, savelist=savelist, epochs=EPOCHS)


print(time.time() - total_time)


gpt-1
2020-05-17 17:00:11,377 Reading data from /home/max/git/newcombined/dataset_businessnews/input
2020-05-17 17:00:11,380 Train: /home/max/git/newcombined/dataset_businessnews/input/flair_train.csv
2020-05-17 17:00:11,383 Dev: /home/max/git/newcombined/dataset_businessnews/input/flair_dev.csv
2020-05-17 17:00:11,385 Test: /home/max/git/newcombined/dataset_businessnews/input/flair_test.csv


  train_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  test_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc
  dev_file, tokenizer=tokenizer, max_tokens_per_doc=max_tokens_per_doc


2020-05-17 17:00:26,508 Computing label dictionary. Progress:


100%|██████████| 13718/13718 [00:00<00:00, 247933.63it/s]

2020-05-17 17:00:26,567 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 17:00:26,568 Computing label dictionary. Progress:



100%|██████████| 13718/13718 [00:00<00:00, 258815.18it/s]

2020-05-17 17:00:26,624 [b'3', b'0', b'4', b'1', b'2']
2020-05-17 17:00:26,627 ----------------------------------------------------------------------------------------------------
2020-05-17 17:00:26,628 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
    )
    (word_reprojection_map): Linear(in_features=100, out_features=256, bias=True)
    (rnn): LSTM(256, 512, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): CrossEntropyLoss()
)"
2020-05-17 17:00:26,629 ----------------------------------------------------------------------------------------------------
2020-05-17 17:00:26,630 Corpus: "Corpus: 13718 train + 500 dev + 500 test sentences"
2020-05-17 17:00:26,631 ----------------------------------------------------------------------------------------------------
2020-05-17 17:00




2020-05-17 17:00:26,775 epoch 1 - iter 0/429 - loss 1.65062249 - samples/sec: 10297.96
2020-05-17 17:00:31,272 epoch 1 - iter 42/429 - loss 1.46980667 - samples/sec: 299.43
2020-05-17 17:00:35,642 epoch 1 - iter 84/429 - loss 1.46700921 - samples/sec: 308.15
2020-05-17 17:00:39,770 epoch 1 - iter 126/429 - loss 1.46125768 - samples/sec: 326.19
2020-05-17 17:00:45,454 epoch 1 - iter 168/429 - loss 1.45774611 - samples/sec: 236.80
2020-05-17 17:00:49,555 epoch 1 - iter 210/429 - loss 1.45048466 - samples/sec: 328.38
2020-05-17 17:00:53,619 epoch 1 - iter 252/429 - loss 1.44441523 - samples/sec: 331.45
2020-05-17 17:00:57,674 epoch 1 - iter 294/429 - loss 1.44080300 - samples/sec: 332.23
2020-05-17 17:01:01,719 epoch 1 - iter 336/429 - loss 1.43433913 - samples/sec: 332.90
2020-05-17 17:01:05,813 epoch 1 - iter 378/429 - loss 1.42742018 - samples/sec: 328.98
2020-05-17 17:01:09,878 epoch 1 - iter 420/429 - loss 1.42145558 - samples/sec: 331.26
2020-05-17 17:01:10,612 ---------------------

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020-05-17 17:01:14,939 ----------------------------------------------------------------------------------------------------
2020-05-17 17:01:15,014 epoch 2 - iter 0/429 - loss 1.27847493 - samples/sec: 18661.16
2020-05-17 17:01:17,882 epoch 2 - iter 42/429 - loss 1.36794381 - samples/sec: 470.18
2020-05-17 17:01:20,782 epoch 2 - iter 84/429 - loss 1.37524241 - samples/sec: 464.71
2020-05-17 17:01:23,712 epoch 2 - iter 126/429 - loss 1.36403941 - samples/sec: 460.42
2020-05-17 17:01:26,614 epoch 2 - iter 168/429 - loss 1.35945516 - samples/sec: 464.46
2020-05-17 17:01:29,549 epoch 2 - iter 210/429 - loss 1.35797206 - samples/sec: 459.35
2020-05-17 17:01:32,495 epoch 2 - iter 252/429 - loss 1.35003504 - samples/sec: 457.62
2020-05-17 17:01:35,374 epoch 2 - iter 294/429 - loss 1.35093566 - samples/sec: 468.26
2020-05-17 17:01:40,221 epoch 2 - iter 336/429 - loss 1.35778247 - samples/sec: 277.84
2020-05-17 17:01:43,174 epoch 2 - iter 378/429 - loss 1.36515168 - samples/sec: 456.37
2020-05

2020-05-17 17:04:35,638 DEV : loss 1.166002869606018 - score 0.508
2020-05-17 17:04:35,705 BAD EPOCHS (no improvement): 1
2020-05-17 17:04:35,706 ----------------------------------------------------------------------------------------------------
2020-05-17 17:04:35,779 epoch 8 - iter 0/429 - loss 0.96752071 - samples/sec: 18927.52
2020-05-17 17:04:38,742 epoch 8 - iter 42/429 - loss 1.14836320 - samples/sec: 454.97
2020-05-17 17:04:41,696 epoch 8 - iter 84/429 - loss 1.15324045 - samples/sec: 456.23
2020-05-17 17:04:44,674 epoch 8 - iter 126/429 - loss 1.15512529 - samples/sec: 452.77
2020-05-17 17:04:47,643 epoch 8 - iter 168/429 - loss 1.15131626 - samples/sec: 454.06
2020-05-17 17:04:50,632 epoch 8 - iter 210/429 - loss 1.15033506 - samples/sec: 451.05
2020-05-17 17:04:53,588 epoch 8 - iter 252/429 - loss 1.14472843 - samples/sec: 456.38
2020-05-17 17:04:56,523 epoch 8 - iter 294/429 - loss 1.14182044 - samples/sec: 459.24
2020-05-17 17:04:59,473 epoch 8 - iter 336/429 - loss 1.136

2020-05-17 17:07:54,367 ----------------------------------------------------------------------------------------------------
2020-05-17 17:07:54,367 EPOCH 13 done: loss 1.0234 - lr 0.1000
2020-05-17 17:07:55,165 DEV : loss 0.9866554141044617 - score 0.612
2020-05-17 17:07:55,231 BAD EPOCHS (no improvement): 0
2020-05-17 17:07:58,302 ----------------------------------------------------------------------------------------------------
2020-05-17 17:07:58,380 epoch 14 - iter 0/429 - loss 0.70325166 - samples/sec: 17978.86
2020-05-17 17:08:01,386 epoch 14 - iter 42/429 - loss 1.00515173 - samples/sec: 448.51
2020-05-17 17:08:04,415 epoch 14 - iter 84/429 - loss 0.98868121 - samples/sec: 445.46
2020-05-17 17:08:07,379 epoch 14 - iter 126/429 - loss 0.99210523 - samples/sec: 455.14
2020-05-17 17:08:10,391 epoch 14 - iter 168/429 - loss 0.99926491 - samples/sec: 447.41
2020-05-17 17:08:13,421 epoch 14 - iter 210/429 - loss 1.00353137 - samples/sec: 444.87
2020-05-17 17:08:16,408 epoch 14 - ite

In [None]:
# For info about model, load the module, then type ?? to end of name
# OpenAIGPT2Embeddings??

### SAVE

In [54]:
#name='all_flair_512LSTM_15ep_8model'
name='FINAL_flair_all_trainsz_'+str(trainsize)
print(len(savelist))
saveResults(savelist, name=name)

9


In [55]:
len(savelist)

9

In [56]:
trainsize

13718

### DONE

In [None]:
# Code for combining results from 2 training times into 1 list etc.

In [None]:
# add the 2 last
name='all_flair_512LSTM_15ep_model'
name = 'FINAL_flair_all_trainsz_'+str(trainsize)
te = loadResults(name)

In [145]:
#newlist = te+savelist
newlist = te
len(newlist)

9

In [146]:
# cut doubles away
# newlist = newlist[5:]  

In [147]:
# save combined
name='FINAL_flair_all_trainsz_'+str(trainsize)
print(len(newlist))
saveResults(newlist, name=name)

9


In [None]:
# move list item 5 to end
# te[5]
# te.append(te.pop(5))

In [253]:
len(savelist)

9

In [69]:
te = savelist[0:5] + [savelist[9]] + savelist[6:9]
len(te)

9

In [258]:
for i in savelist:
    print(i['model'])

glove
fasttext web-crawl
fasttext news/wiki
en-twitter
elmo
Flair
bert-base-cased
BytePairEmbedding
gpt-1


In [None]:
# remove list item 0 from list
savelist.pop(0)

In [254]:
# remove list item n from list
savelist.pop(7)

{'model': 'gpt-1', 'labels': 0      3
 1      0
 2      0
 3      3
 4      3
       ..
 495    3
 496    4
 497    0
 498    3
 499    3
 Name: label, Length: 500, dtype: int64, 'confidence': 0      0.998186
 1      0.914472
 2      0.928485
 3      0.959929
 4      0.650925
          ...   
 495    0.963905
 496    0.890513
 497    0.797013
 498    0.586767
 499    0.941501
 Name: confidence, Length: 500, dtype: float64, 'traintime': 199.73805332183838, 'predtime3k': 3.4320037364959717, 'modeldesc': '512LSTM_15epoch_non-bi'}

In [139]:
for i in te:
    print(i['model'])

glove
fasttext web-crawl
fasttext news/wiki
en-twitter
elmo
Flair
bert-base-uncased
BytePairEmbedding
gpt-1


In [246]:
savelist[8]

{'model': 'gpt-1', 'labels': 0      3
 1      0
 2      0
 3      3
 4      3
       ..
 495    3
 496    4
 497    0
 498    3
 499    3
 Name: label, Length: 500, dtype: int64, 'confidence': 0      0.998186
 1      0.914472
 2      0.928485
 3      0.959929
 4      0.650925
          ...   
 495    0.963905
 496    0.890513
 497    0.797013
 498    0.586767
 499    0.941501
 Name: confidence, Length: 500, dtype: float64, 'traintime': 199.73805332183838, 'predtime3k': 3.4320037364959717, 'modeldesc': '512LSTM_15epoch_non-bi'}

In [307]:
savelist[8]['model']='gpt-1'

In [128]:
for i in savelist:
    

2

In [132]:
trainsize

1000

In [140]:
te[5]

{'model': 'Flair', 'labels': 0       1
 1       1
 2       1
 3       1
 4       1
        ..
 2995    1
 2996    1
 2997    1
 2998    1
 2999    1
 Name: label, Length: 3000, dtype: int64, 'confidence': 0       0.805744
 1       0.546344
 2       0.802081
 3       0.714745
 4       0.753530
           ...   
 2995    0.759887
 2996    0.893057
 2997    0.859725
 2998    0.813483
 2999    0.864573
 Name: confidence, Length: 3000, dtype: float64, 'traintime': 81.63569831848145, 'predtime3k': 1589378399.4414687, 'modeldesc': '512LSTM_15epoch_non-bi'}

In [138]:
te[5] = savelist[0]

In [142]:
te[6] = savelist[1]

In [149]:
for i in newlist:
    print(i['model'])

glove
fasttext web-crawl
fasttext news/wiki
en-twitter
elmo
Flair
bert-base-uncased
BytePairEmbedding
gpt-1


In [None]:
savelist[1]