In [1]:
import lucem_illud_2020 #pip install -U git+git://github.com/Computational-Content-Analysis-2020/lucem_illud_2020.git

#All these packages need to be installed from pip
#For ML
import sklearn
import sklearn.naive_bayes
import sklearn.tree
import sklearn.ensemble
import sklearn.neural_network
import sklearn.decomposition

import nltk #For tokenizing and normalizing
import numpy as np #arrays
import matplotlib.pyplot as plt #Plots
import matplotlib.colors # For nice colours
import seaborn #Makes plots look nice, also heatmaps
import scipy as sp #for interp

#These are from the standard library
import collections
import os
import os.path
import random
import re
import glob
import pandas as pd
import requests
import json
import math

#This 'magic' command makes the plots work better
#in the notebook, don't use it outside of a notebook.
#Also you can ignore the warning
%matplotlib inline

## Lyrics-Based Music Genre/Artist Classification

## Scenario 4

In [2]:
lyrics_1 = pd.read_csv("cleaned_lyrics_year.csv")
lyrics_1

Unnamed: 0,year,artist,genre,lyrics,normalized
0,2009,beyonce-knowles,Pop,"oh baby, how you doing? you know i'm gonna cut...","['oh', 'baby', 'know', 'be', 'gon', 'na', 'cut..."
1,2009,beyonce-knowles,Pop,"playin' everything so easy, it's like you seem...","['playin', 'easy', 'like', 'sure', 'way', 'not..."
2,2009,beyonce-knowles,Pop,if you search for tenderness it isn't hard to ...,"['search', 'tenderness', 'hard', 'find', 'love..."
3,2009,beyonce-knowles,Pop,"oh oh oh i, oh oh oh i if i wrote a book abo...","['oh', 'oh', 'oh', 'oh', 'oh', 'oh', 'write', ..."
4,2009,beyonce-knowles,Pop,"party the people, the people the party it's po...","['party', 'people', 'people', 'party', 'pop', ..."
...,...,...,...,...,...
210637,2012,edens-edge,Country,"i gotta say boy, after only just a couple of d...","['get', 'ta', 'boy', 'couple', 'date', 'hand',..."
210638,2012,edens-edge,Country,i helped you find her diamond ring you made me...,"['help', 'find', 'diamond', 'ring', 'try', 'to..."
210639,2012,edens-edge,Country,look at the couple in the corner booth looks a...,"['look', 'couple', 'corner', 'booth', 'look', ..."
210640,2012,edens-edge,Country,when i fly off this mortal earth and i'm measu...,"['fly', 'mortal', 'earth', 'be', 'measure', 'd..."


In [3]:
genre_dict = {}
for i, c in enumerate(pd.unique(lyrics_1['genre'])):
    genre_dict[c] = i
genre_dict

{'Pop': 0,
 'Hip-Hop': 1,
 'Rock': 2,
 'Metal': 3,
 'Country': 4,
 'Jazz': 5,
 'Electronic': 6,
 'Folk': 7,
 'R&B': 8,
 'Indie': 9}

In [4]:
lyrics_1['genre'] = lyrics_1['genre'].apply(lambda x:genre_dict[x])

In [5]:
from sklearn.model_selection import train_test_split
# train_test split
lyrics_1_train, lyrics_1_test = train_test_split(lyrics_1, test_size=0.3, random_state=0)

In [6]:
# do deep learning models
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from transformers import BertTokenizer, BertConfig # pip install tranformers==2.4.1
from transformers import AdamW, BertForSequenceClassification
from keras.preprocessing.sequence import pad_sequences
import torch

Using TensorFlow backend.


In [7]:
gpu = torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if gpu:
    n_gpu = torch.cuda.device_count()
    torch.cuda.get_device_name(0)
device

device(type='cpu')

In [8]:
train_lyrics = ["[CLS] " + s + " [SEP]" for s in lyrics_1_train['lyrics']]
train_labels = lyrics_1_train['genre'].values
test_lyrics = ["[CLS] " + s + " [SEP]" for s in lyrics_1_test['lyrics']]
test_labels = lyrics_1_test['genre'].values

In [9]:
deep_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
from multiprocessing import Pool
p = Pool()
train_tokenized = p.map(deep_tokenizer.tokenize, train_lyrics)
test_tokenized = p.map(deep_tokenizer.tokenize, test_lyrics)

In [11]:
train_ids = p.map(deep_tokenizer.convert_tokens_to_ids, train_tokenized)
test_ids = p.map(deep_tokenizer.convert_tokens_to_ids, test_tokenized)

In [12]:
MAX_LEN = 512
train_ids = pad_sequences(train_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
test_ids = pad_sequences(test_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

In [14]:
embedding_dim = 32
unit = 100
no_labels = len(np.unique(train_labels))
batch_size = 32

In [15]:
no_labels

10

In [16]:
# adjustment for imbalanced dataset
train_groups = lyrics_1_train.groupby(['genre'])
train_groups = dict(list(train_groups))

In [17]:
class_weights = {k:len(train_lyrics)/len(v) for k,v in train_groups.items()}
class_weights

{0: 6.2238402768984,
 1: 9.545478086359811,
 2: 2.147430202583633,
 3: 10.074405575293795,
 4: 14.898353036273619,
 5: 29.31974547623782,
 6: 32.40637362637363,
 7: 125.16893039049236,
 8: 64.4726716222125,
 9: 71.1970062771608}

In [18]:
bases = np.linspace(min(class_weights.values()),max(class_weights.values()),5)
bases

array([  2.1474302 ,  32.90280525,  63.6581803 ,  94.41355534,
       125.16893039])

In [19]:
class_weight_paras = []
for b in bases:
    candidate = {c:v+b for c,v in class_weights.items()}
    class_weight_paras.append(candidate)
class_weight_paras

[{0: 8.371270479482034,
  1: 11.692908288943444,
  2: 4.294860405167266,
  3: 12.221835777877429,
  4: 17.04578323885725,
  5: 31.46717567882145,
  6: 34.553803828957264,
  7: 127.316360593076,
  8: 66.62010182479614,
  9: 73.34443647974443},
 {0: 39.126645526459214,
  1: 42.44828333592062,
  2: 35.05023545214445,
  3: 42.97721082485461,
  4: 47.80115828583443,
  5: 62.222550725798634,
  6: 65.30917887593444,
  7: 158.0717356400532,
  8: 97.37547687177332,
  9: 104.09981152672161},
 {0: 69.88202057343639,
  1: 73.20365838289781,
  2: 65.80561049912163,
  3: 73.73258587183179,
  4: 78.55653333281163,
  5: 92.97792577277582,
  6: 96.06455392291163,
  7: 188.82711068703037,
  8: 128.1308519187505,
  9: 134.8551865736988},
 {0: 100.63739562041359,
  1: 103.95903342987499,
  2: 96.56098554609882,
  3: 104.48796091880898,
  4: 109.3119083797888,
  5: 123.733300819753,
  6: 126.81992896988882,
  7: 219.58248573400755,
  8: 158.8862269657277,
  9: 165.61056162067598},
 {0: 131.39277066739078,


In [20]:
model_lstm_1 = Sequential()
model_lstm_1.add(Embedding(deep_tokenizer.vocab_size, 
                           embedding_dim, input_length=MAX_LEN))
model_lstm_1.add(LSTM(unit))
model_lstm_1.add(Dense(no_labels, activation='softmax'))
model_lstm_1.compile(loss='sparse_categorical_crossentropy', optimizer='adam', 
                     metrics=['accuracy'])
model_lstm_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 512, 32)           976704    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1010      
Total params: 1,030,914
Trainable params: 1,030,914
Non-trainable params: 0
_________________________________________________________________


In [22]:
history_lstm_1 = model_lstm_1.fit(train_ids, train_labels, 
                                  epochs=15,batch_size=batch_size,
                                  class_weight=class_weight_paras[2],
                                  use_multiprocessing=True,
                                  workers=32)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [23]:
predict_1 = model_lstm_1.predict_classes(test_ids, batch_size=32)
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(test_labels, predict_1)

0.3931704979773331

In [24]:
from sklearn.metrics import accuracy_score,roc_auc_score
accuracy_score(test_labels, predict_1)

0.5759656924026395

In [25]:
from sklearn import metrics
metrics.balanced_accuracy_score(test_labels, predict_1)

0.3445176586623711

In [26]:
from sklearn.metrics import roc_auc_score
roc_auc_score(test_labels, model_lstm_1.predict(test_ids, batch_size=32),multi_class='ovo')

0.7625341870411606

In [27]:
model_lstm_2 = Sequential()
model_lstm_2.add(Embedding(deep_tokenizer.vocab_size, 
                           embedding_dim, input_length=MAX_LEN))
model_lstm_2.add(LSTM(100,return_sequences=True))
model_lstm_2.add(LSTM(100))
model_lstm_2.add(Dense(no_labels, activation='softmax'))
model_lstm_2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm_2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 512, 32)           976704    
_________________________________________________________________
lstm_2 (LSTM)                (None, 512, 100)          53200     
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 1,111,314
Trainable params: 1,111,314
Non-trainable params: 0
_________________________________________________________________


In [28]:
history_lstm_2 = model_lstm_2.fit(train_ids, train_labels, 
                                  epochs=10,batch_size=batch_size,
                                  use_multiprocessing=True,
                                  workers=32)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
predict_2 = model_lstm_2.predict_classes(test_ids, batch_size=32)
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(test_labels, predict_2)

0.4214566149349209

In [30]:
from sklearn.metrics import accuracy_score,roc_auc_score
accuracy_score(test_labels, predict_2)

0.5977402560410172

In [31]:
from sklearn.metrics import roc_auc_score
roc_auc_score(test_labels, model_lstm_2.predict(test_ids, batch_size=32),multi_class='ovo')

0.7781341769462767

In [32]:
# save model
model_lstm_2.save('lstm_model.h5')

In [35]:
model_lstm_3 = Sequential()
model_lstm_3.add(Embedding(deep_tokenizer.vocab_size, 
                           output_dim= 64, input_length=MAX_LEN))
model_lstm_3.add(LSTM(100,return_sequences=True))
model_lstm_3.add(LSTM(100))
model_lstm_3.add(Dense(no_labels, activation='softmax'))
model_lstm_3.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm_3.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 512, 64)           1953408   
_________________________________________________________________
lstm_4 (LSTM)                (None, 512, 100)          66000     
_________________________________________________________________
lstm_5 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
Total params: 2,100,818
Trainable params: 2,100,818
Non-trainable params: 0
_________________________________________________________________


In [36]:
history_lstm_3 = model_lstm_3.fit(train_ids, train_labels, 
                                  epochs=10,batch_size=batch_size,
                                  use_multiprocessing=True,
                                  workers=32)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
predict_3 = model_lstm_3.predict_classes(test_ids, batch_size=32)
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(test_labels, predict_3)

In [None]:
accuracy_score(test_labels, predict_3)

In [None]:
roc_auc_score(test_labels, model_lstm_3.predict(test_ids, batch_size=32),multi_class='ovo')

## Bert Preprocessing

In [None]:
%%writefile mask.py
def mask(x):
    return [float(i>0) for i in x]

In [None]:
from mask import mask 
train_masks = p.map(mask,train_ids) 
test_masks = p.map(mask,test_ids)

In [None]:
final_train_inputs, validation_inputs, final_train_labels, validation_labels = train_test_split(
    train_ids, train_labels, random_state=2020, test_size=0.1)
final_train_masks, validation_masks = train_test_split(train_masks,random_state=2020, test_size=0.1)

In [73]:
import torch # pip install torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertConfig # pip install tranformers==2.4.1
from transformers import AdamW, BertForSequenceClassification
from tqdm import tqdm, trange

In [75]:
final_train_inputs = torch.tensor(final_train_inputs,dtype=torch.long)
validation_inputs = torch.tensor(validation_inputs,dtype=torch.long)
final_train_labels = torch.tensor(final_train_labels,dtype=torch.long)
validation_labels = torch.tensor(validation_labels,dtype=torch.long)
final_train_masks = torch.tensor(final_train_masks,dtype=torch.long)
validation_masks = torch.tensor(validation_masks,dtype=torch.long)

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """
  


In [76]:
# Select a batch size for training. For fine-tuning BERT on a specific task, the authors recommend a batch size of 16 or 32
batch_size = 32

# Create an iterator of our data with torch DataLoader. This helps save on memory during training because, unlike a for loop, 
# with an iterator the entire dataset does not need to be loaded into memory

final_train_data = TensorDataset(final_train_inputs, final_train_masks, final_train_labels)
final_train_sampler = RandomSampler(final_train_data)
final_train_dataloader = DataLoader(final_train_data, sampler=final_train_sampler, batch_size=batch_size)

validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

In [77]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=10)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=361.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




In [78]:
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [79]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [80]:
from transformers import get_linear_schedule_with_warmup
# This variable contains all of the hyperparemeter information our training loop needs
optimizer = AdamW(model.parameters(),
                  lr = 5e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )

# Number of training epochs (authors recommend between 2 and 4)
epochs = 4

# Total number of training steps is number of batches * number of epochs.
total_steps = len(final_train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                       num_warmup_steps = 0, # Default value in run_glue.py
                       num_training_steps = total_steps)

## Bret Training

In [None]:
import random

# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# Store the average loss after each epoch so we can plot them.
loss_values = []

# For each epoch...
for epoch_i in range(0, epochs):
    
    # ========================================
    #               Training
    # ========================================
    
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_loss = 0

    # Put the model into training mode. Don't be mislead--the call to 
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(final_train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)
            
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(final_train_dataloader), elapsed))

        # Unpack this training batch from our dataloader. 
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the 
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids 
        #   [1]: attention masks
        #   [2]: labels 
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically because 
        # accumulating the gradients is "convenient while training RNNs". 
        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()        

        # Perform a forward pass (evaluate the model on this training batch).
        # This will return the loss (rather than the model output) because we
        # have provided the `labels`.
        # The documentation for this `model` function is here: 
        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
        outputs = model(b_input_ids, 
                    token_type_ids=None, 
                    attention_mask=b_input_mask, 
                    labels=b_labels)
        
        # The call to `model` always returns a tuple, so we need to pull the 
        # loss value out of the tuple.
        loss = outputs[0]

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value 
        # from the tensor.
        total_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(final_train_dataloader)            
    
    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))
        
    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables 
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:
        
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        
        # Telling the model not to compute or store gradients, saving memory and
        # speeding up validation
        with torch.no_grad():        

            # Forward pass, calculate logit predictions.
            # This will return the logits rather than the loss because we have
            # not provided labels.
            # token_type_ids is the same as the "segment ids", which 
            # differentiates sentence 1 and 2 in 2-sentence tasks.
            # The documentation for this `model` function is here: 
            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
            outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask)
        
        # Get the "logits" output by the model. The "logits" are the output
        # values prior to applying an activation function like the softmax.
        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        # Calculate the accuracy for this batch of test sentences.
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        
        # Accumulate the total accuracy.
        eval_accuracy += tmp_eval_accuracy

        # Track the number of batches
        nb_eval_steps += 1

    # Report the final accuracy for this validation run.
    print("  Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
    print("  Validation took: {:}".format(format_time(time.time() - t0)))

print("")
print("Training complete!")


Training...
  Batch    40  of  4,675.    Elapsed: 1:59:19.
  Batch    80  of  4,675.    Elapsed: 3:56:54.
  Batch   120  of  4,675.    Elapsed: 5:52:28.
  Batch   160  of  4,675.    Elapsed: 8:17:31.
  Batch   200  of  4,675.    Elapsed: 8:46:03.
  Batch   240  of  4,675.    Elapsed: 9:11:58.
  Batch   280  of  4,675.    Elapsed: 9:39:15.
  Batch   320  of  4,675.    Elapsed: 10:07:51.
  Batch   360  of  4,675.    Elapsed: 10:36:43.
  Batch   400  of  4,675.    Elapsed: 11:03:40.
  Batch   440  of  4,675.    Elapsed: 11:32:11.
  Batch   480  of  4,675.    Elapsed: 11:59:16.
  Batch   520  of  4,675.    Elapsed: 12:27:46.
  Batch   560  of  4,675.    Elapsed: 12:56:28.
  Batch   600  of  4,675.    Elapsed: 13:24:35.
  Batch   640  of  4,675.    Elapsed: 13:51:51.
  Batch   680  of  4,675.    Elapsed: 14:19:24.
  Batch   720  of  4,675.    Elapsed: 14:46:35.
  Batch   760  of  4,675.    Elapsed: 15:12:56.
  Batch   800  of  4,675.    Elapsed: 15:39:09.
  Batch   840  of  4,675.    Elaps