# Important Imports

In [1]:
from platform import python_version

print(python_version())

3.11.0


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
import re

In [4]:
import nltk
nltk.download("stopwords")
nltk.download("punkt")  # Punkt Sentence Tokenizer
nltk.download("averaged_perceptron_tagger")  # Part of Speech Tagger
nltk.download("wordnet")  # a lexical database of English; useful for synonyms, hyponyms, etc.

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/marneusz/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/marneusz/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/marneusz/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/marneusz/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

In [6]:
import random

In [7]:
stop = set(stopwords.words('english'))
random.sample(stopwords.words('english'), 10)

['isn',
 'm',
 "wouldn't",
 'again',
 "doesn't",
 'not',
 'own',
 'he',
 "needn't",
 'both']

Consider removing some stop words like _no_, _yes_, etc.

# Loading Data

In [8]:
DATASETS = {
    "FakeNews": "",
    "ISOT": ""
}

CUR_DATASET = "FakeNews"

In [9]:
train_dataset = pd.read_csv(f"./data/{CUR_DATASET}/train.csv.zip")
test_dataset = pd.read_csv(f"./data/{CUR_DATASET}/test.csv.zip")

In [10]:
train_dataset.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [11]:
labels = train_dataset["label"].values

In [12]:
whole_dataset = pd.concat([train_dataset, test_dataset])

# Some More EDA

In [13]:
train_dataset.isnull().sum()

id           0
title      558
author    1957
text        39
label        0
dtype: int64

In [14]:
train_dataset.isnull().sum() / train_dataset.shape[0]

id        0.000000
title     0.026827
author    0.094087
text      0.001875
label     0.000000
dtype: float64

In [15]:
whole_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26000 entries, 0 to 5199
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   id      26000 non-null  int64  
 1   title   25320 non-null  object 
 2   author  23540 non-null  object 
 3   text    25954 non-null  object 
 4   label   20800 non-null  float64
dtypes: float64(1), int64(1), object(3)
memory usage: 1.2+ MB


# Data Preprocessing and Data Preparation

In [16]:
train_dataset = train_dataset.fillna("null data")
test_dataset = test_dataset.fillna("null data")

In [17]:
preprocessing_text_fn = {
    "no_punctuation": lambda txt: re.sub(r'[^\w\s]','', txt),
    "no_special_symbols": lambda txt: re.sub('[$,#,&]', '', txt),
    "no_digits": lambda txt: re.sub('\d*', '', txt),
    "no_www": lambda txt: re.sub('w{3}', '', txt),
    "no_urls": lambda txt: re.sub('http\S+', '', txt),
    "no_spaces": lambda txt: re.sub('\s+', ' ', txt),
    "no_single_chars": lambda txt: re.sub(r'\s+[a-zA-Z]\s+', '', txt)
}

In [18]:
def preprocess_text(text, pipeline = preprocessing_text_fn):
    text = str(text)
    for fn in pipeline.keys():
        text = pipeline[fn](text)
        
    return text

Consider removing some of the stopwords.

In [19]:
stop_words = stopwords.words('english')
STOP_WORDS = [preprocessing_text_fn["no_punctuation"](word) for word in stop_words]
random.sample(stop_words, 20)

["haven't",
 'them',
 'our',
 'wouldn',
 'those',
 'they',
 'off',
 "don't",
 'itself',
 "that'll",
 'same',
 'an',
 'weren',
 'where',
 'aren',
 'then',
 'shouldn',
 'doesn',
 'doing',
 't']

In [20]:
def tokenize_without_stopwords(text, stop_words=STOP_WORDS):
    word_tokens = word_tokenize(text)
    filtered_sequence = [word for word in word_tokens if not word.lower() in stop_words]
    return filtered_sequence

In [21]:
train_dataset["title"] = train_dataset["title"].apply(preprocess_text)
train_dataset["text"] = train_dataset["text"].apply(preprocess_text)
train_dataset.head(10)

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide We Didnt Even See Comeys Letter...,Darrell Lucus,House Dem Aide We Didnt Even See Comeys Letter...,1
1,1,FLYNN Hillary Clinton Big Woman on Campus Brei...,Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,Why the Truth Might Get You Fired October The ...,1
3,3,Civilians Killed In Single US Airstrike Have ...,Jessica Purkiss,Videos Civilians Killed In Single US Airstrike...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print An Iranian woman has been sentenced to s...,1
5,5,Jackie Mason Hollywood Would Love Trump if He ...,Daniel Nussbaum,In these trying times Jackie Mason is the Voic...,0
6,6,Life Life Of Luxury Elton Johns Favorite Shark...,null data,Ever wonder how Britains most iconic pop piani...,1
7,7,Benoît Hamon Wins French Socialist Partys Pres...,Alissa J. Rubin,PARIS France chose an idealistic traditional c...,0
8,8,Excerpts FromDraft Script for Donald Trumps Qa...,null data,DonaldTrump is scheduled to makehighly anticip...,0
9,9,A BackChannel Plan for Ukraine and Russia Cour...,Megan Twohey and Scott Shane,A week before MichaelFlynn resigned as nationa...,0


In [22]:
test_dataset["title"] = test_dataset["title"].apply(preprocess_text)
test_dataset["text"] = test_dataset["text"].apply(preprocess_text)
test_dataset.head(10)

Unnamed: 0,id,title,author,text
0,20800,Specter of Trump Loosens Tongues if Not Purse ...,David Streitfeld,PALO ALTO Calif After years of scorning the po...
1,20801,Russian warships ready to strike terrorists ne...,null data,Russian warships ready to strike terrorists ne...
2,20802,NoDAPL Native American Leaders Vow to Stay All...,Common Dreams,Videos NoDAPL Native American Leaders Vow to S...
3,20803,Tim Tebow Will Attempt Another Comeback This T...,Daniel Victor,If at first you dont succeed trydifferent spor...
4,20804,Keiser Report Meme Wars E,Truth Broadcast Network,mins ago Views Comments Likes For the first t...
5,20805,Trump is USAs antique hero Clinton will be nex...,null data,Trump is USAs antique hero Clinton will be nex...
6,20806,Pelosi Calls for FBI Investigation to Find Out...,Pam Key,Sunday on NBCs Meet the Press House Minority L...
7,20807,Weekly Featured Profile Randy Shannon,Trevor Loudon,You are here Home Articles of the Bound Weekly...
8,20808,Urban Population Booms Will Make Climate Chang...,null data,Urban Population Booms Will Make Climate Chang...
9,20809,null data,cognitive dissident,dont we have the receipt


In [23]:
# train_dataset["title"] = train_dataset["title"].apply(tokenize_without_stopwords)
# train_dataset["text"] = train_dataset["text"].apply(tokenize_without_stopwords)
# train_dataset.head(10)

In [24]:
# test_dataset["title"] = test_dataset["title"].apply(tokenize_without_stopwords)
# test_dataset["text"] = test_dataset["text"].apply(tokenize_without_stopwords)
# test_dataset.head(10)

In [25]:
# train_text = train_dataset['text'].values
train_text = (train_dataset['author'] + " " + train_dataset['title']).values
test_text = (test_dataset['author'] + " " + test_dataset['title']).values

In [26]:
labels = train_dataset['label'].values

In the Kaggle competition the best scores were obtained by using only 'author' and 'title' features. Let's take a look if it's possible to train BERT using text.

# Model Initialization

In [27]:
from tqdm import tqdm

In [28]:
import torch
if torch.cuda.is_available():    
    device = torch.device('cuda')    
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [29]:
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, get_linear_schedule_with_warmup
from torch.optim import AdamW
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

In [72]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, precision_score, recall_score

In [30]:
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)

In [31]:
print(' Original: ', train_text[0])
print('Tokenized: ', tokenizer.tokenize(train_text[0]))
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(train_text[0])))

 Original:  Darrell Lucus House Dem Aide We Didnt Even See Comeys Letter Until Jason Chaffetz Tweeted It
Tokenized:  ['darrell', 'luc', '##us', 'house', 'dem', 'aide', 'we', 'didn', '##t', 'even', 'see', 'come', '##ys', 'letter', 'until', 'jason', 'cha', '##ffe', '##tz', 't', '##wee', '##ted', 'it']
Token IDs:  [23158, 12776, 2271, 2160, 17183, 14895, 2057, 2134, 2102, 2130, 2156, 2272, 7274, 3661, 2127, 4463, 15775, 16020, 5753, 1056, 28394, 3064, 2009]


In [32]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', # Use the 124-layer, 1024-hidden, 16-heads, 340M parameters BERT model with an uncased vocab.
    num_labels = 2, 
    output_attentions = False, 
    output_hidden_states = False, 
)
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [33]:
max_len = 0
for text in tqdm(train_text):
    input_ids = tokenizer.encode(text, add_special_tokens=True)
    max_len = max(max_len, len(input_ids))
print('Max sentence length: ', max_len)

100%|████████████████████████████████████████████████████████████████████████████████████| 20800/20800 [00:03<00:00, 5885.54it/s]

Max sentence length:  107





In [34]:
# https://www.kaggle.com/code/jeongwonkim10516/nlp-fake-news-with-bert-99-55-top1/notebook

def tokenize_map(sentence,labs='None'):
    
    """A function for tokenize all of the sentences and map the tokens to their word IDs."""
    
    global labels
    
    input_ids = []
    attention_masks = []

    # For every sentence...
    
    for text in sentence:
        #   "encode_plus" will:
        
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        
        encoded_dict = tokenizer.encode_plus(
                            text,                      # Sentence to encode.
                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                            truncation='longest_first', # Activate and control truncation
                            max_length = 107,           # Max length according to our text data.
                            padding = 'max_length', # Pad & truncate all sentences.
                            return_attention_mask = True,   # Construct attn. masks.
                            return_tensors = 'pt',     # Return pytorch tensors.
                       )

        # Add the encoded sentence to the id list. 
        
        input_ids.append(encoded_dict['input_ids'])

        # And its attention mask (simply differentiates padding from non-padding).
        
        attention_masks.append(encoded_dict['attention_mask'])
        
    # Convert the lists into tensors.
    
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    
    if labs != 'None': # Setting this for using this definition for both train and test data so labels won't be a problem in our outputs.
        labels = torch.tensor(labels)
        return input_ids, attention_masks, labels
    
    return input_ids, attention_masks

In [35]:
train_text.shape, test_text.shape

((20800,), (5200,))

In [36]:
input_ids, attention_masks, labels = tokenize_map(train_text, labels)
test_input_ids, test_attention_masks= tokenize_map(test_text)

  if labs != 'None': # Setting this for using this definition for both train and test data so labels won't be a problem in our outputs.


## Train and Validation Dataset

In [37]:
dataset = TensorDataset(input_ids, attention_masks, labels)

In [38]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_size, val_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [39]:
# Train DataLoader
batch_size = 8

train_dataloader = DataLoader(
            train_dataset,  
            sampler = RandomSampler(train_dataset), 
            batch_size = batch_size 
        )

# Validation DataLoader
validation_dataloader = DataLoader(
            val_dataset, 
            sampler = SequentialSampler(val_dataset), 
            batch_size = batch_size 
)

In [40]:
# Test DataLoader

test_data = TensorDataset(test_input_ids, test_attention_masks)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

## Hyperparameters

In [41]:
optimizer = AdamW(model.parameters(),
                  lr = 1e-5, # args.learning_rate
                  eps = 1e-8 # args.adam_epsilon
            )

In [82]:
num_epochs = 5
total_num_steps = len(train_dataloader) * num_epochs

## Metrics

In [77]:
def accuracy(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    
    return accuracy_score(labels_flat, pred_flat)

def flat_f1_score(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    

    return f1_score(labels_flat, pred_flat)

## Training

In [44]:
import time

In [84]:
training_stats = []
validations_labels_ep = []
actual_labels_ep = []

total_t0 = time.time()
for i in range(0, num_epochs):
    print('')
    print('Training...')
    print('----- Epoch {:} / {:} -----'.format(i + 1, num_epochs))

    t0 = time.time()
    total_train_loss = 0
    model.train()

    for step, batch in enumerate(train_dataloader):

        if step % 40 == 0 and not step == 0:
            elapsed = time.time() - t0
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        b_input_ids = batch[0].to(device).to(torch.int64)
        b_input_mask = batch[1].to(device).to(torch.int64)
        b_labels = batch[2].to(device).to(torch.int64)
        
        model.zero_grad()        

        loss = model(b_input_ids, 
                     token_type_ids=None, 
                     attention_mask=b_input_mask,
                     labels=b_labels)[0]
        logits = model(b_input_ids, 
                       token_type_ids=None, 
                       attention_mask=b_input_mask,
                       labels=b_labels)[1]

        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        # scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)            
    training_time = time.time() - t0

    print('')
    print('  Average training loss: {0:.2f}'.format(avg_train_loss))
    print('  Training epoc h took: {:}'.format(training_time))
    
    print('')
    print('Running Validation...')

    t0 = time.time()
    
    model.eval()

    total_eval_accuracy = 0
    total_eval_loss = 0
    total_eval_f1 = 0
    nb_eval_steps = 0
    
    for batch in validation_dataloader:
        
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        with torch.no_grad():        

            loss = model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)[0]

            logits = model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)[1]
            
        # Accumulate the validation loss.
        
        total_eval_loss += loss.item()

        # Move logits and labels to CPU:
        
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and accumulate it over all batches:
        
        total_eval_accuracy += accuracy(logits, label_ids)
        total_eval_f1 += flat_f1_score(logits, label_ids)
        

    # Report the final accuracy for this validation run.
    
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print('  Accuracy: {0:.5f}'.format(avg_val_accuracy))
    
    # Report the final f1 score for this validation run.
    
    avg_val_f1 = total_eval_f1 / len(validation_dataloader)
    print('  F1: {0:.5f}'.format(avg_val_f1))

    # Calculate the average loss over all of the batches.
    
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    
    
    
    # Measure how long the validation run took:
    
    validation_time = time.time() - t0
    
    print('  Validation Loss: {0:.5f}'.format(avg_val_loss))
    print('  Validation took: {:}'.format(validation_time))

    # Record all statistics from this epoch.
    
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Val_F1' : avg_val_f1,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )


Training...
----- Epoch 1 / 5 -----
  Batch    40  of  2,080.    Elapsed: 7.569205045700073.
  Batch    80  of  2,080.    Elapsed: 14.930202722549438.
  Batch   120  of  2,080.    Elapsed: 22.323073625564575.
  Batch   160  of  2,080.    Elapsed: 29.761128902435303.
  Batch   200  of  2,080.    Elapsed: 37.256072998046875.
  Batch   240  of  2,080.    Elapsed: 44.73497271537781.
  Batch   280  of  2,080.    Elapsed: 52.26723623275757.
  Batch   320  of  2,080.    Elapsed: 59.7916533946991.
  Batch   360  of  2,080.    Elapsed: 67.35209608078003.
  Batch   400  of  2,080.    Elapsed: 74.9380567073822.
  Batch   440  of  2,080.    Elapsed: 82.5787570476532.
  Batch   480  of  2,080.    Elapsed: 90.19231677055359.
  Batch   520  of  2,080.    Elapsed: 97.85612797737122.
  Batch   560  of  2,080.    Elapsed: 105.5098352432251.
  Batch   600  of  2,080.    Elapsed: 113.17989873886108.
  Batch   640  of  2,080.    Elapsed: 120.85448169708252.
  Batch   680  of  2,080.    Elapsed: 128.537738

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.99808
  F1: 0.98839
  Validation Loss: 0.02242
  Validation took: 40.76230764389038

Training...
----- Epoch 2 / 5 -----
  Batch    40  of  2,080.    Elapsed: 7.899852752685547.
  Batch    80  of  2,080.    Elapsed: 15.77142882347107.
  Batch   120  of  2,080.    Elapsed: 23.598982095718384.
  Batch   160  of  2,080.    Elapsed: 31.45575451850891.
  Batch   200  of  2,080.    Elapsed: 39.346439361572266.
  Batch   240  of  2,080.    Elapsed: 47.16419982910156.
  Batch   280  of  2,080.    Elapsed: 55.053699016571045.
  Batch   320  of  2,080.    Elapsed: 62.82506203651428.
  Batch   360  of  2,080.    Elapsed: 70.7035710811615.
  Batch   400  of  2,080.    Elapsed: 78.53822374343872.
  Batch   440  of  2,080.    Elapsed: 86.41547727584839.
  Batch   480  of  2,080.    Elapsed: 94.25663185119629.
  Batch   520  of  2,080.    Elapsed: 102.20321726799011.
  Batch   560  of  2,080.    Elapsed: 110.05482149124146.
  Batch   600  of  2,080.    Elapsed: 117.85931444168091.
  Bat

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.99760
  F1: 0.98753
  Validation Loss: 0.03299
  Validation took: 41.11458468437195

Training...
----- Epoch 3 / 5 -----
  Batch    40  of  2,080.    Elapsed: 8.206607818603516.
  Batch    80  of  2,080.    Elapsed: 16.38247513771057.
  Batch   120  of  2,080.    Elapsed: 24.434902667999268.
  Batch   160  of  2,080.    Elapsed: 32.47109818458557.
  Batch   200  of  2,080.    Elapsed: 40.530410051345825.
  Batch   240  of  2,080.    Elapsed: 48.441991567611694.
  Batch   280  of  2,080.    Elapsed: 56.553415298461914.
  Batch   320  of  2,080.    Elapsed: 64.60986590385437.
  Batch   360  of  2,080.    Elapsed: 72.62580800056458.
  Batch   400  of  2,080.    Elapsed: 80.69424486160278.
  Batch   440  of  2,080.    Elapsed: 88.74522542953491.
  Batch   480  of  2,080.    Elapsed: 96.79158139228821.
  Batch   520  of  2,080.    Elapsed: 104.80403590202332.
  Batch   560  of  2,080.    Elapsed: 112.80157375335693.
  Batch   600  of  2,080.    Elapsed: 120.77660870552063.
  B

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.99832
  F1: 0.98872
  Validation Loss: 0.01859
  Validation took: 40.93830060958862

Training...
----- Epoch 4 / 5 -----
  Batch    40  of  2,080.    Elapsed: 8.113901853561401.
  Batch    80  of  2,080.    Elapsed: 16.139320135116577.
  Batch   120  of  2,080.    Elapsed: 24.164429187774658.
  Batch   160  of  2,080.    Elapsed: 32.19780731201172.
  Batch   200  of  2,080.    Elapsed: 40.285669565200806.
  Batch   240  of  2,080.    Elapsed: 48.30656099319458.
  Batch   280  of  2,080.    Elapsed: 56.366872787475586.
  Batch   320  of  2,080.    Elapsed: 64.48837733268738.
  Batch   360  of  2,080.    Elapsed: 72.59440493583679.
  Batch   400  of  2,080.    Elapsed: 80.82812237739563.
  Batch   440  of  2,080.    Elapsed: 88.98242545127869.
  Batch   480  of  2,080.    Elapsed: 97.14892411231995.
  Batch   520  of  2,080.    Elapsed: 105.33432054519653.
  Batch   560  of  2,080.    Elapsed: 113.50136280059814.
  Batch   600  of  2,080.    Elapsed: 121.74960494041443.
  B

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.99832
  F1: 0.98872
  Validation Loss: 0.02278
  Validation took: 41.20144200325012

Training...
----- Epoch 5 / 5 -----
  Batch    40  of  2,080.    Elapsed: 8.1109037399292.
  Batch    80  of  2,080.    Elapsed: 16.235691785812378.
  Batch   120  of  2,080.    Elapsed: 24.343839168548584.
  Batch   160  of  2,080.    Elapsed: 32.362966537475586.
  Batch   200  of  2,080.    Elapsed: 40.380187034606934.
  Batch   240  of  2,080.    Elapsed: 48.43663239479065.
  Batch   280  of  2,080.    Elapsed: 56.47460126876831.
  Batch   320  of  2,080.    Elapsed: 64.47854566574097.
  Batch   360  of  2,080.    Elapsed: 72.49240350723267.
  Batch   400  of  2,080.    Elapsed: 80.50635981559753.
  Batch   440  of  2,080.    Elapsed: 88.50810241699219.
  Batch   480  of  2,080.    Elapsed: 96.47233080863953.
  Batch   520  of  2,080.    Elapsed: 104.56666278839111.
  Batch   560  of  2,080.    Elapsed: 112.56584453582764.
  Batch   600  of  2,080.    Elapsed: 120.62103796005249.
  Bat

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.99832
  F1: 0.98872
  Validation Loss: 0.02389
  Validation took: 41.05749845504761
