In [25]:
# First run this cell
!pip install terminaltables rouge
from datetime import datetime
import argparse
import random
import pickle
import codecs
import json
import os
import nltk
import torch
import numpy as np
from pprint import pprint
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from terminaltables import AsciiTable
from rouge import Rouge

import pandas as pd



#### Preprocess data

In [26]:
dataset_file = './data/reviews.csv'

In [27]:
data = pd.read_csv("./data/Reviews.csv")[:25000]

In [28]:
data.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


In [29]:
data = data.dropna()
data = data.reset_index(drop=True)
data = data.loc[:, ['Summary', 'Text']]
data.head()

Unnamed: 0,Summary,Text
0,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,"""Delight"" says it all",This is a confection that has been around a fe...
3,Cough Medicine,If you are looking for the secret ingredient i...
4,Great taffy,Great taffy at a great price. There was a wid...


In [30]:
training_data = data[:20000]
test_data = data[20000:]

training_data = training_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [31]:
source_w2i = {}
source_i2w = []
target_w2i = {}
target_i2w = []

# The padding symbol will be used to ensure that all tensors in a batch
# have equal length.
PADDING_SYMBOL = ' '
source_w2i[PADDING_SYMBOL] = 0
source_i2w.append( PADDING_SYMBOL )
target_w2i[PADDING_SYMBOL] = 0
target_i2w.append( PADDING_SYMBOL )

START_SYMBOL = '<START>'
END_SYMBOL = '<END>'
UNK_SYMBOL = '<UNK>'
source_w2i[START_SYMBOL] = 1
source_i2w.append( START_SYMBOL )
target_w2i[START_SYMBOL] = 1
target_i2w.append( START_SYMBOL )
source_w2i[END_SYMBOL] = 2
source_i2w.append( END_SYMBOL )
target_w2i[END_SYMBOL] = 2
target_i2w.append( END_SYMBOL )
source_w2i[UNK_SYMBOL] = 3
source_i2w.append( UNK_SYMBOL )
target_w2i[UNK_SYMBOL] = 3
target_i2w.append( UNK_SYMBOL )

# Max number of words to be predicted if <END> symbol is not reached
MAX_PREDICTIONS = 20

In [32]:
class AmazonDataset(Dataset) :
    """
    A dataset with source sentences and their respective translations
    into the target language.

    Each sentence is represented as a list of word IDs. 
    """
    def __init__( self, data, record_symbols=True ) :
        try :
            nltk.word_tokenize("hi there.")
        except LookupError:
            nltk.download('punkt')
        self.source_list = []
        self.target_list = []
        # Read the datafile
        
        for i in range(len(data)):
            s = data.Text[i]
            t = data.Summary[i]
            source_sentence = []
            for w in nltk.word_tokenize(s) :
                w = w.lower()
                if w not in source_i2w and record_symbols :
                    source_w2i[w] = len(source_i2w)
                    source_i2w.append( w )
                source_sentence.append( source_w2i.get(w, source_w2i[UNK_SYMBOL]) )
            source_sentence.append(source_w2i[END_SYMBOL])
            self.source_list.append( source_sentence )
            target_sentence = []
            for w in nltk.word_tokenize(t) :
                w = w.lower()
                if w not in target_i2w and record_symbols :
                    target_w2i[w] = len(target_i2w)
                    target_i2w.append( w )
                target_sentence.append( target_w2i.get(w, target_w2i[UNK_SYMBOL]) )
            target_sentence.append(target_w2i[END_SYMBOL])
            self.target_list.append( target_sentence )

    def __len__(self) :
        return len(self.source_list)

    def __getitem__(self, idx) :
        return self.source_list[idx], self.target_list[idx]

In [33]:
# torch.save(dataset, 'amazon_dataset_py')

In [34]:
training_dataset = AmazonDataset(training_data)

In [35]:
test_dataset = AmazonDataset(test_data, record_symbols=False)

In [36]:
# loaded_dataset = torch.load('./data/amazon_dataset_py')

In [37]:
# len(loaded_dataset)

In [38]:
# training_dataset, test_dataset = torch.utils.data.random_split(loaded_dataset, [0.8,0.2])

In [39]:
def pad_sequence(batch, pad_source=source_w2i[PADDING_SYMBOL], pad_target=target_w2i[PADDING_SYMBOL]):
    source, target = zip(*batch)
    max_source_len = max(map(len, source))
    max_target_len = max(map(len, target))
    padded_source = [[b[i] if i < len(b) else pad_source for i in range(max_source_len)] for b in source]
    padded_target = [[l[i] if i < len(l) else pad_target for i in range(max_target_len)] for l in target]
    return padded_source, padded_target

In [40]:
def load_glove_embeddings(embedding_file):
    """
    Reads pre-made embeddings from a file
    """
    N = len(source_w2i)
    embeddings = [0]*N
    with codecs.open(embedding_file, 'r', 'utf-8') as f:
        for line in f:
            data = line.split()
            word = data[0].lower()
            if word not in source_w2i:
                source_w2i[word] = N
                source_i2w.append(word)
                N += 1
                embeddings.append(0)
            vec = [float(x) for x in data[1:]]
            D = len(vec)
            embeddings[source_w2i[word]] = vec
    # Add a '0' embedding for the padding symbol
    embeddings[0] = [0]*D
    # Check if there are words that did not have a ready-made Glove embedding
    # For these words, add a random vector
    for word in source_w2i:
        index = source_w2i[word]
        if embeddings[index] == 0:
            embeddings[index] = (np.random.random(D)-0.5).tolist()
    return D, embeddings

In [41]:
# ==================== Encoder ==================== #

class EncoderRNN(nn.Module):
    """
    Encodes a batch of source sentences. 
    """
    
    def __init__(self, no_of_input_symbols, embeddings=None, embedding_size=16, hidden_size=25,
        encoder_bidirectional=False, device='cpu', use_gru=False, tune_embeddings=False) :
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.is_bidirectional = encoder_bidirectional
        self.embedding = nn.Embedding(no_of_input_symbols,embedding_size)
        if embeddings !=  None :
            self.embedding.weight = nn.Parameter( torch.tensor(embeddings, dtype=torch.float), requires_grad=tune_embeddings )
        if use_gru:
            self.rnn = nn.GRU(embedding_size, hidden_size, batch_first=True, bidirectional=self.is_bidirectional)
        else:
            self.rnn = nn.RNN(embedding_size, hidden_size, batch_first=True, bidirectional=self.is_bidirectional)
        self.device = device
        self.to(device)

    def set_embeddings(self, embeddings):
        self.embedding.weight = torch.tensor(embeddings, dtype=torch.float)

    def forward(self, x):
        """
        x is a list of lists of size (batch_size,max_seq_length)
        Each inner list contains word IDs and represents one sentence.
        The whole list-of-lists represents a batch of sentences.
       
        Returns:
        the output from the encoder RNN: a pair of two tensors, one containing all hidden states, and one 
        containing the last hidden state (see https://pytorch.org/docs/stable/generated/torch.nn.RNN.html)
        """

        x_tensor = torch.tensor(x).to(self.device)
        
        # FOR TASK (a), REPLACE THE FOLLOWING LINE WITH YOUR CODE
        embed_x = self.embedding(x_tensor)
        return self.rnn(embed_x)

In [42]:
# ==================== Decoder ==================== #

class DecoderRNN(nn.Module) :

    def __init__(self, no_of_output_symbols, embedding_size=16, hidden_size=25, use_attention=True,
        display_attention=False, device='cpu', use_gru=False) :
        super(DecoderRNN, self).__init__()
        self.embedding = nn.Embedding(no_of_output_symbols,embedding_size)
        self.no_of_output_symbols = no_of_output_symbols
        self.W = nn.Parameter(torch.rand(hidden_size, hidden_size)-0.5) # shouldn't W be 2*hidden_size
        self.U = nn.Parameter(torch.rand(hidden_size, hidden_size)-0.5)
        self.v = nn.Parameter(torch.rand(hidden_size, 1)-0.5)
        self.use_attention = use_attention
        self.display_attention = display_attention
        if use_gru:
            self.rnn = nn.GRU(embedding_size, hidden_size, batch_first=True)
        else:
            self.rnn = nn.RNN(embedding_size, hidden_size, batch_first=True)
        self.output = nn.Linear( hidden_size, no_of_output_symbols )
        self.device = device
        self.to(device)

    def forward(self, inp, hidden, encoder_outputs) :
        """
        'input' is a list of length batch_size, containing the current word
        of each sentence in the batch

        'hidden' is a tensor containing the last hidden state of the decoder, 
        for each sequence in the batch
        hidden.shape = (1, batch_size, hidden_size)

        'encoder_outputs' is a tensor containing all hidden states from the
        encoder (used in problem c)
        encoder_outputs.shape = (batch_size, max_seq_length, hidden_size)

        Note that 'max_seq_length' above refers to the max_seq_length
        of the encoded sequence (not the decoded sequence).

        Returns:
        If use_attention and display_attention are both True (task (c)), return a triple
        (logits for the predicted next word, hidden state, attention weights alpha)

        Otherwise (task (b)), return a pair
        (logits for the predicted next word, hidden state).
        """
        inp_tensor = torch.tensor(inp).to(self.device)

        # FOR (b) and (c) REPLACE THE FOLLOWING LINE WITH YOUR CODE
        embed_inp = self.embedding(inp_tensor).unsqueeze(1) # (64 x 1 x 50)
        if self.use_attention:
            softmax = torch.nn.Softmax(dim=1)
            first = encoder_outputs @ self.W
            second = hidden.squeeze(0) @ self.U
            summ = first + second.unsqueeze(1)
            e = torch.tanh(summ) @ self.v
            alphas = softmax(e)
            context = torch.sum(alphas * encoder_outputs, dim=1).unsqueeze(0)
            final, new_hidden = self.rnn(embed_inp, context)
        else:
            final, new_hidden = self.rnn(embed_inp, hidden)
        if self.display_attention:
            return self.output(final), new_hidden, alphas
        else:
            return self.output(final), new_hidden

In [51]:
# This function will be used for evaluation of both the dev set (during training)
# and the test set (after training is finished).
def evaluate(ds, encoder, decoder):
    predicted_summaries = []
    correct_summaries = []
    for x, y in ds :
        correct_summary = ""
        predicted_summary = ""
        outputs, hidden = encoder( [x] )
        if encoder.is_bidirectional :
            hidden = hidden.permute((1,0,2)).reshape(1,-1).unsqueeze(0)
        predicted_symbol = target_w2i[START_SYMBOL]
        for correct in y :
            correct_summary +=  target_i2w[correct] + " "
            predictions, hidden = decoder( [predicted_symbol], hidden, outputs )
            _, predicted_tensor = predictions.topk(1)
            predicted_symbol = predicted_tensor.detach().item()
            predicted_summary +=  target_i2w[predicted_symbol] + " "
        predicted_summaries.append(predicted_summary)
        correct_summaries.append(correct_summary)
    rouge = Rouge()
    print(rouge.get_scores(predicted_summaries, correct_summaries, avg=True))   

In [55]:
# Use 'Run all cells' to do the training.

# ================ Hyper-parameters ================ #

use_attention = True     
use_gru = True         # Use Gated Recurrent Units (rather than plain RNNs)
bidirectional = True   # Use a bidirectional encoder
use_embeddings = True      # Use pre-loaded Glove embeddings
tune_embeddings = True # Fine-tune the Glove embeddings
batch_size = 64
hidden_size = 25       # Number of dimensions in the hidden state
learning_rate = 0.001
epochs = 50            # We will train for this many epochs
save = False           # Do not save the model

# ==================== Training ==================== #
# Reproducibility
# Read a bit more here -- https://pytorch.org/docs/stable/notes/randomness.html
# random.seed(5719)
# np.random.seed(5719)
#torch.manual_seed(5719)
#torch.use_deterministic_algorithms(True)

# Can we run on GPU?
if torch.cuda.is_available():
    print("Current device: {}".format(torch.cuda.get_device_name(0)))
else:
    print('Running on CPU')
print()
device = 'cuda' if torch.cuda.is_available() else 'cpu'


print( "Number of source words: ", len(source_i2w) )
print( "Number of target words: ", len(target_i2w) )
print( "Number of training sentences: ", len(training_dataset) )
print()

# If we have pre-computed word embeddings, then make sure these are used
if use_embeddings:
    embedding_size, embeddings = load_glove_embeddings('/datasets/dd2417/glove.6B.50d.txt')
else :
    embedding_size = args.hidden_size
    embeddings = None

training_loader = DataLoader(training_dataset, batch_size=batch_size, collate_fn=pad_sequence)
dev_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=pad_sequence)
        
criterion = nn.CrossEntropyLoss()

encoder = EncoderRNN(
    len(source_i2w),
    embeddings=embeddings,
    embedding_size=embedding_size,
    hidden_size=hidden_size,
    encoder_bidirectional=bidirectional,
    tune_embeddings=tune_embeddings,
    use_gru=use_gru,
    device=device
)
decoder = DecoderRNN(
    len(target_i2w),
    embedding_size=embedding_size,
    hidden_size=hidden_size*(bidirectional+1),
    use_attention=use_attention,
    use_gru=use_gru,
    device=device
)

encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

encoder.train()
decoder.train()
print( datetime.now().strftime("%H:%M:%S"), "Starting training." )

for epoch in range( epochs ) :
    total_loss = 0
    for source, target in training_loader: #tqdm(training_loader, desc="Epoch {}".format(epoch + 1)):
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        loss = 0
        # hidden is (D * num_layers, B, H)
        outputs, hidden = encoder( source )
        if bidirectional:
            # (2, B, H) -> (B, 2 * H) -> (1, B, 2 * H)
            hidden = torch.cat([hidden[0,:, :], hidden[1,:,:]], dim=1).unsqueeze(0)
                    
        # The probability of doing teacher forcing will decrease
        # from 1 to 0 over the range of epochs. This could be implemented
        # like this:
        # teacher_forcing_ratio = 1- epoch/args.epochs
        # But, for now we will always use teacher forcing
        teacher_forcing_ratio = 1

        # The input to the decoder in the first time step will be
        # the boundary symbol, regardless if we are using teacher
        # forcing or not.
        idx = [target_w2i[START_SYMBOL] for sublist in target]
        predicted_symbol = [target_w2i[START_SYMBOL] for sublist in target]

        target_length = len(target[0])
        for i in range(target_length) :
            use_teacher_forcing = (random.random() < teacher_forcing_ratio)
            if use_teacher_forcing :
                predictions, hidden = decoder( idx, hidden, outputs )
            else:
                # Here we input the previous prediction rather than the
                # correct symbol.
                predictions, hidden = decoder( predicted_symbol, hidden, outputs )
            _, predicted_tensor = predictions.topk(1)
            predicted_symbol = predicted_tensor.squeeze().tolist()

            # The targets will be the ith symbol of all the target
            # strings. They will also be used as inputs for the next
            # time step if we use teacher forcing.
            idx = [sublist[i] for sublist in target]
            loss += criterion( predictions.squeeze(), torch.tensor(idx).to(device) )
        loss /= (target_length * batch_size)
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()
        total_loss += loss
    print( datetime.now().strftime("%H:%M:%S"), "Epoch", epoch, "loss:", total_loss.detach().item() )
    total_loss = 0

    if epoch % 10 == 0:
        print("Evaluating on the dev data...")
        evaluate(test_dataset, encoder, decoder)

# ==================== Save the model  ==================== #

if ( save ) :
    dt = str(datetime.now()).replace(' ','_').replace(':','_').replace('.','_')
    newdir = 'model_' + dt
    os.mkdir( newdir )
    torch.save( encoder.state_dict(), os.path.join(newdir, 'encoder.model') )
    torch.save( decoder.state_dict(), os.path.join(newdir, 'decoder.model') )
    with open( os.path.join(newdir, 'source_w2i'), 'wb' ) as f :
        pickle.dump( source_w2i, f )
        f.close()
    with open( os.path.join(newdir, 'source_i2w'), 'wb' ) as f :
        pickle.dump( source_i2w, f )
        f.close()
    with open( os.path.join(newdir, 'target_w2i'), 'wb' ) as f :
        pickle.dump( target_w2i, f )
        f.close()
    with open( os.path.join(newdir, 'target_i2w'), 'wb' ) as f :
        pickle.dump( target_i2w, f )
        f.close()

    settings = {
        'training_set': training_file,
        'test_set': test_file,
        'epochs': epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'hidden_size': hidden_size,
        'attention': attention,
        'bidirectional': bidirectional,
        'embedding_size': embedding_size,
        'use_gru': use_gru,
        'tune_embeddings': tune_embeddings
    }
    with open( os.path.join(newdir, 'settings.json'), 'w' ) as f:
        json.dump(settings, f)

# ==================== Evaluation ==================== #

encoder.eval()
decoder.eval()
print( "Evaluating on the test data..." )

print( "Number of test sentences: ", len(test_dataset) )
print()

evaluate(test_dataset, encoder, decoder)


Current device: NVIDIA H100 80GB HBM3 MIG 1g.10gb

Number of source words:  412673
Number of target words:  7262
Number of training sentences:  20000

19:15:27 Starting training.
19:15:37 Epoch 0 loss: 14.669292449951172
Evaluating on the dev data...
{'rouge-1': {'r': 0.24542983288787787, 'p': 0.5589, 'f': 0.3261917213130668}, 'rouge-2': {'r': 0.003996666666666666, 'p': 0.0074, 'f': 0.004710649321581403}, 'rouge-l': {'r': 0.24542983288787787, 'p': 0.5589, 'f': 0.3261917213130668}}
19:16:00 Epoch 1 loss: 9.700180053710938
19:16:11 Epoch 2 loss: 9.252373695373535
19:16:22 Epoch 3 loss: 8.94954776763916
19:16:32 Epoch 4 loss: 8.71351432800293
19:16:43 Epoch 5 loss: 8.515838623046875
19:16:54 Epoch 6 loss: 8.341333389282227
19:17:04 Epoch 7 loss: 8.186420440673828
19:17:15 Epoch 8 loss: 8.047307968139648
19:17:25 Epoch 9 loss: 7.919774055480957
19:17:36 Epoch 10 loss: 7.799694538116455
Evaluating on the dev data...
{'rouge-1': {'r': 0.2311809388193151, 'p': 0.3496540476190371, 'f': 0.26899

In [None]:
# ==================== User interaction ==================== #

decoder.display_attention = True
while( True ) :
    text = input( "> " )
    if text == "" :
        continue
    try :
        source_sentence = [source_w2i[w] for w in nltk.word_tokenize(text)]
    except KeyError :
        print( "Erroneous input string" )
        continue
    outputs, hidden = encoder( [source_sentence] )
    if encoder.is_bidirectional :
        hidden = hidden.permute((1,0,2)).reshape(1,-1).unsqueeze(0)
        
    predicted_symbol = target_w2i[START_SYMBOL]
    target_sentence = []
    attention_probs = []    
    num_attempts = 0
    while num_attempts < MAX_PREDICTIONS:
        if use_attention :
            predictions, hidden, alpha = decoder( [predicted_symbol], hidden, outputs )
            attention_probs.append( alpha.permute(0,2,1).squeeze().detach().tolist() )
        else :
            predictions, hidden = decoder( [predicted_symbol], hidden, outputs )
            
        _, predicted_tensor = predictions.topk(1)
        predicted_symbol = predicted_tensor.detach().item()
        target_sentence.append( predicted_symbol )

        num_attempts += 1

        if predicted_symbol == target_w2i[END_SYMBOL] :
            break

    for i in target_sentence :
        print( target_i2w[i].encode('utf-8').decode(), end=' ' )
    print()

    if use_attention :
        # Construct the attention table
        ap = torch.tensor(attention_probs).T
        if len(ap.shape) == 1:
            ap = ap.unsqueeze(0)
        attention_probs = ap.tolist()
            
        for i in range(len(attention_probs)) :
            for j in range(len(attention_probs[i])) :
                attention_probs[i][j] = "{val:.2f}".format(val=attention_probs[i][j])
        for i in range(len(attention_probs)) :
            if i<len(text) :
                attention_probs[i].insert(0,source_i2w[source_sentence[i]])
            else :
                attention_probs[i].insert(0,' ')
        first_row = ["Source/Result"]
        for w in target_sentence :
            first_row.append(target_i2w[w])
        attention_probs.insert(0,first_row)
        t = AsciiTable( attention_probs )
        print( t.table )


>  bought several vitality canned dog food products found good quality product looks like stew processed meat smells better labrador finicky appreciates product better.


great product , but pricey as the market <END> 
+---------------+-------+---------+------+------+--------+------+------+--------+-------+
| Source/Result | great | product | ,    | but  | pricey | as   | the  | market | <END> |
+---------------+-------+---------+------+------+--------+------+------+--------+-------+
| bought        | 0.01  | 0.00    | 0.00 | 0.00 | 0.00   | 0.00 | 0.00 | 0.00   | 0.00  |
| several       | 0.05  | 0.04    | 0.00 | 0.00 | 0.03   | 0.01 | 0.00 | 0.00   | 0.00  |
| vitality      | 0.02  | 0.01    | 0.17 | 0.00 | 0.02   | 0.00 | 0.00 | 0.05   | 0.00  |
| canned        | 0.01  | 0.00    | 0.05 | 0.00 | 0.01   | 0.01 | 0.00 | 0.00   | 0.00  |
| dog           | 0.00  | 0.16    | 0.00 | 0.00 | 0.00   | 0.00 | 0.00 | 0.00   | 0.00  |
| food          | 0.00  | 0.00    | 0.01 | 0.00 | 0.00   | 0.01 | 0.00 | 0.00   | 0.00  |
| products      | 0.00  | 0.02    | 0.01 | 0.00 | 0.00   | 0.02 | 0.00 | 0.00   | 0.00  |
| found         | 0.01  | 0.00    | 0.01 | 0.00 | 0.

>  product arrived labeled jumbo salted peanuts peanuts actually small sized unsalted sure error vendor intended represent product jumbo.


not what i love the best thing since sliced a lot of the best thing since sliced a lot of 
+---------------+------+------+------+------+------+------+-------+-------+--------+------+------+------+------+------+-------+-------+--------+------+------+------+
| Source/Result | not  | what | i    | love | the  | best | thing | since | sliced | a    | lot  | of   | the  | best | thing | since | sliced | a    | lot  | of   |
+---------------+------+------+------+------+------+------+-------+-------+--------+------+------+------+------+------+-------+-------+--------+------+------+------+
| product       | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00  | 0.01  | 0.00   | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.02  | 0.00   | 0.00 | 0.00 | 0.00 |
| arrived       | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00  | 0.00   | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00  | 0.00   | 0.00 | 0.00 | 0.00 |
| labeled       | 0.00 | 0.02 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00  | 0.0

>  confection around centuries light pillowy citrus gelatin nuts case filberts cut tiny squares liberally coated powdered sugar tiny mouthful heaven chewy flavorful highly recommend yummy treat familiar story c lewis lion witch wardrobe treat seduces edmund selling brother sisters witch


my dog loves these ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 
+---------------+------+------+-------+-------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
| Source/Result | my   | dog  | loves | these | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    | !    |
+---------------+------+------+-------+-------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
| confection    | 0.00 | 0.00 | 0.00  | 0.00  | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| around        | 0.00 | 0.00 | 0.00  | 0.00  | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| centuries     | 0.00 | 0.00 | 0.00  | 0.00  | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.

>  confection around centuries light pillowy citrus gelatin nuts case filberts cut tiny squares liberally coated powdered sugar tiny mouthful heaven chewy flavorful highly recommend yummy treat familiar story c lewis lion witch wardrobe treat seduces edmund selling brother sisters witch.


great product , but not a great product , but not a great product , but not a great product 
+---------------+-------+---------+------+------+------+------+-------+---------+------+------+------+------+-------+---------+------+------+------+------+-------+---------+
| Source/Result | great | product | ,    | but  | not  | a    | great | product | ,    | but  | not  | a    | great | product | ,    | but  | not  | a    | great | product |
+---------------+-------+---------+------+------+------+------+-------+---------+------+------+------+------+-------+---------+------+------+------+------+-------+---------+
| confection    | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    |
| around        | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    | 0.00 | 0.00 | 0.00 | 0.00 | 0.00  | 0.00    |
| centuries     | 0.00  | 0.00    | 0

>  looking secret ingredient robitussin believe found got addition root beer extract ordered good made cherry soda flavor medicinal.


great product . <END> 
+---------------+-------+---------+------+-------+
| Source/Result | great | product | .    | <END> |
+---------------+-------+---------+------+-------+
| looking       | 0.00  | 0.00    | 0.00 | 0.00  |
| secret        | 0.00  | 0.31    | 0.00 | 0.00  |
| ingredient    | 0.06  | 0.03    | 0.00 | 0.48  |
| robitussin    | 0.00  | 0.63    | 0.00 | 0.00  |
| believe       | 0.01  | 0.00    | 0.00 | 0.00  |
| found         | 0.00  | 0.00    | 0.00 | 0.00  |
| got           | 0.28  | 0.00    | 0.00 | 0.03  |
| addition      | 0.00  | 0.00    | 0.00 | 0.00  |
| root          | 0.12  | 0.03    | 0.97 | 0.21  |
| beer          | 0.01  | 0.00    | 0.00 | 0.00  |
| extract       | 0.00  | 0.01    | 0.00 | 0.00  |
| ordered       | 0.00  | 0.00    | 0.00 | 0.00  |
| good          | 0.31  | 0.00    | 0.01 | 0.27  |
| made          | 0.01  | 0.00    | 0.00 | 0.00  |
| cherry        | 0.07  | 0.00    | 0.01 | 0.01  |
| soda          | 0.00  | 0.00    | 0.00 | 0.00  |
| flavor

>  great taffy great price wide assortment yummy taffy delivery quick taffy lover deal.


great service <END> 
+---------------+-------+---------+-------+
| Source/Result | great | service | <END> |
+---------------+-------+---------+-------+
| great         | 0.00  | 0.00    | 0.00  |
| taffy         | 0.00  | 0.00    | 0.00  |
| great         | 0.01  | 0.01    | 0.01  |
| price         | 0.00  | 0.00    | 0.00  |
| wide          | 0.00  | 0.00    | 0.00  |
| assortment    | 0.00  | 0.02    | 0.00  |
| yummy         | 0.00  | 0.00    | 0.00  |
| taffy         | 0.00  | 0.00    | 0.00  |
| delivery      | 0.00  | 0.00    | 0.00  |
| quick         | 0.00  | 0.00    | 0.00  |
| taffy         | 0.00  | 0.00    | 0.00  |
| lover         | 0.99  | 0.97    | 0.99  |
| deal          | 0.00  | 0.00    | 0.00  |
| .             | 0.00  | 0.00    | 0.00  |
+---------------+-------+---------+-------+


>  great dog food product.


great product <END> 
+---------------+-------+---------+-------+
| Source/Result | great | product | <END> |
+---------------+-------+---------+-------+
| great         | 0.98  | 0.22    | 0.78  |
| dog           | 0.01  | 0.74    | 0.00  |
| food          | 0.02  | 0.01    | 0.22  |
| product       | 0.00  | 0.02    | 0.00  |
| .             | 0.00  | 0.00    | 0.00  |
+---------------+-------+---------+-------+


>  great dog food product for a cheap price.


great product <END> 
+---------------+-------+---------+-------+
| Source/Result | great | product | <END> |
+---------------+-------+---------+-------+
| great         | 0.92  | 0.04    | 0.46  |
| dog           | 0.00  | 0.56    | 0.01  |
| food          | 0.01  | 0.01    | 0.05  |
| product       | 0.02  | 0.34    | 0.01  |
| for           | 0.01  | 0.01    | 0.09  |
| a             | 0.00  | 0.01    | 0.03  |
| cheap         | 0.04  | 0.03    | 0.31  |
| price         | 0.00  | 0.01    | 0.04  |
| .             | 0.00  | 0.00    | 0.00  |
+---------------+-------+---------+-------+
