<a href="https://colab.research.google.com/github/ericsdata/colinsbeer/blob/main/src/WriteReviews.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.2 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 47.5 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 42.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 34.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: p

In [3]:
## Packages
import os
import pandas as pd
import torch
import pickle
import numpy as np
import random
import datetime
import time


from torch.utils.data import random_split, RandomSampler, SequentialSampler
from torch.utils.data import Dataset, DataLoader

from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config,  AdamW, get_linear_schedule_with_warmup


In [4]:
## Class to hold beer text
class generative_BD(torch.utils.data.Dataset):
  '''Sequence text tokens
      This means it adds tags to start and end of texts

      Reads in text dataset, & tokenizes

      !!! NEED TO ADD PADDING TOKEN
  '''

  def __init__(self,text_list, tokenizer, text_tags, gpt2_type="gpt2", max_length=768):
    #self.text_list = text_list
    self.tokenizer = tokenizer
    self.input_ids = []
    self.attn_masks = []
    self.text_tags = [text_tags]

    ## In definitoin
    for txt in text_list:

        encodings_dict = tokenizer('<%s>'%(text_tags[0])+ txt + '<%s>'%(text_tags[1]), truncation=True, max_length=max_length, padding="max_length")

        self.input_ids.append(torch.tensor(encodings_dict['input_ids']))
        self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))

  def __getitem__(self, idx):
    return self.input_ids[idx], self.attn_masks[idx]

  def __len__(self):
    return len(self.input_ids)

## Func to do some date formatting

def format_time(time_elapsed):
    return str(datetime.timedelta(seconds = int(round(time_elapsed))))


In [5]:
### Data Read

with open("train_data", "rb") as fp:   # Unpickling
   revs = pickle.load(fp)

In [6]:
# Split into training and validation sets
train_size = int(0.8 * len(revs))
val_size = len(revs) - train_size

revs_train, revs_val = random_split(revs, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))


1,907 training samples
  477 validation samples


In [7]:
### Init tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

## Add padding token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

### Set up the data in datasets
train_dataset = generative_BD(text_list= revs_train, tokenizer = tokenizer, text_tags = ['CLS', 'SEP'])
val_dataset = generative_BD(text_list= revs_val, tokenizer = tokenizer, text_tags = ['CLS', 'SEP'])

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


In [8]:
## Init paramters
batch_size = 2


epochs = 5
learning_rate = 5e-4
warmup_steps = 1e2
epsilon = 1e-8

# this produces sample output every 100 steps
sample_every = 50

In [9]:
# Create the DataLoaders for our training and validation datasets.
# We'll take training samples in random order. 
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [10]:
## Init Model

# I'm not really doing anything with the config 
configuration = GPT2Config.from_pretrained('gpt2', output_hidden_states=False)

model = GPT2LMHeadModel.from_pretrained("gpt2", config=configuration)

# this step is necessary because I've added some tokens (bos_token, etc) to the embeddings
# otherwise the tokenizer and model tensors won't match up
model.resize_token_embeddings(len(tokenizer))

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

Embedding(50258, 768)

In [11]:
### SEt model how to run
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Tell pytorch to run this model on the GPU.
device = torch.device("cuda")
model.cuda()

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [12]:
optimizer = AdamW(model.parameters(),
            lr = learning_rate,
            eps = epsilon)



In [13]:
### Figure out how many training steps

total_steps = len(revs_train) * epochs

## A scheduler adjusts the learning rate as the training loop progresses

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps = warmup_steps,
                                            num_training_steps = total_steps)

In [14]:
#start training
total_t0 = time.time()
## store records of each run
training_stats = []

### Manual set for each epoch of training
for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    ## Store process starting time
    startT = time.time()
    ## Init loss across training
    total_train_loss = 0
    #begin model training
    model.train()

    for step, batch in enumerate(train_dataset):
        b_input_ids =batch[0].to(device) ## send input ids to model
        b_labels = batch[0].to(device) ## send labels to model (labs are smae as input in gen text)
        b_masks = batch[1].to(device) ## send attention layer to model

        #init gradient at 0
        model.zero_grad()        
        ## feed labels, inputs, and masks to model
        outputs = model(  b_input_ids,
                          labels=b_labels, 
                          attention_mask = b_masks,
                          token_type_ids=None
                        )
        ## Calc loss
        loss = outputs[0]  
        ### loss on batch
        batch_loss = loss.item()
        ## Add it to total training loss
        total_train_loss += batch_loss
        ### Reporting step
        if step % sample_every == 0 and not step == 0:
          elapsed = format_time(time.time() - startT)
          print('  Batch {:>5,}  of  {:>5,}. Loss: {:>5,}.   Elapsed: {:}.'.format(step, len(train_dataset), batch_loss, elapsed))

          model.eval()
          ### Output some samples so you know its working
          sample_outputs = model.generate(
                                bos_token_id = random.randint(1,30000)
                                ,do_sample = True
                                ,top_k = 50
                                ,max_lenght = 200
                                ,top_p = 0.095
                                ,num_return_sequences = 1
          )

          for i, sample_output in enumerate(sample_outputs):
                  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

          model.train()

        loss.backward()

        optimizer.step()

        scheduler.step()
    ## Calcu average loss over all epochs
    avg_train_loss = total_train_loss / len(train_dataset)

    ## Ouput how long epoch took
    training_time = format_time(time.time() - startT)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))

     # ========================================
    #               Validation
    # ========================================

    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:
        
        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)
        
        with torch.no_grad():        

            outputs  = model(b_input_ids, 
#                            token_type_ids=None, 
                             attention_mask = b_masks,
                            labels=b_labels)
          
            loss = outputs[0]  
            
        batch_loss = loss.item()
        total_eval_loss += batch_loss        

    avg_val_loss = total_eval_loss / len(validation_dataloader)
    
    validation_time = format_time(time.time() - t0)    

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch    50  of  1,907. Loss: 0.4144846200942993.   Elapsed: 0:00:28.
0:  bipartisan and


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   100  of  1,907. Loss: 0.763882040977478.   Elapsed: 0:00:56.
0:  increasing the, and the amount of


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   150  of  1,907. Loss: 0.25168395042419434.   Elapsed: 0:01:23.
0: dayCLS>3202 1 Bottle of Bottle of Bottle of Bottle of Bottle of Bottle of Bottle


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   200  of  1,907. Loss: 0.1421884447336197.   Elapsed: 0:01:51.
0:  HangCLS>46471 1 Bottle of Bottle of Bottle of Bottle of Bottle of Bottle of Bottle


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   250  of  1,907. Loss: 0.7142435908317566.   Elapsed: 0:02:19.
0:  foodsCLS>46471 1 Bottle of coffee.  Aroma is light and sweet with a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   300  of  1,907. Loss: 0.4597827196121216.   Elapsed: 0:02:46.
0:  trailCLS>117319 1 Bottle.  Aroma of caramel, caramel, caramel,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   350  of  1,907. Loss: 0.24330691993236542.   Elapsed: 0:03:14.
0: intendCLS>3202 1 Bottle.  Pours a dark brown with a thin head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   400  of  1,907. Loss: 0.15504448115825653.   Elapsed: 0:03:42.
0:  surroundCLS>117319 1 Bottle.  Aroma is of caramel, chocolate, and


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   450  of  1,907. Loss: 1.3873591423034668.   Elapsed: 0:04:09.
0:  reflexCLS>4302 1 Aroma: 									


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   500  of  1,907. Loss: 0.28531956672668457.   Elapsed: 0:04:37.
0:  displayCLS>29028 1 Bottle.  Aroma is of roasted malt, caramel, and


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   550  of  1,907. Loss: 0.5123282670974731.   Elapsed: 0:05:05.
0:  pastorCLS>117319 1 Bottle.  Aroma is sweet caramel, caramel, caramel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   600  of  1,907. Loss: 0.43285268545150757.   Elapsed: 0:05:32.
0:  illicitCLS>4302 1 Bottle.  Aroma of coffee, fig, fig, fig


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   650  of  1,907. Loss: 0.2823176980018616.   Elapsed: 0:06:00.
0:  LiberationCLS>117319 1 Bottle.  Pours a clear amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   700  of  1,907. Loss: 0.34294506907463074.   Elapsed: 0:06:28.
0:  NamCLS>117319 1 12 oz bottle.  Aroma of caramel, caramel,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   750  of  1,907. Loss: 0.29556670784950256.   Elapsed: 0:06:55.
0: IONCLS>42935 1 Bottle.  Pours a clear dark brown with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   800  of  1,907. Loss: 0.3187301754951477.   Elapsed: 0:07:23.
0:  glimpseCLS>42935 1 Bottle.  Aroma of caramel, caramel, caramel, caramel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   850  of  1,907. Loss: 0.9229152202606201.   Elapsed: 0:07:50.
0:  LaureCLS>42935 1 Bottle.  Pours a dark brown with a medium sized head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   900  of  1,907. Loss: 0.3078581988811493.   Elapsed: 0:08:18.
0: ismCLS>117319 1 12oz bottle.  Pours a dark brown with a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   950  of  1,907. Loss: 0.23028533160686493.   Elapsed: 0:08:46.
0: ounCLS>42935 1 Bottle.  Pours a clear golden brown with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,000  of  1,907. Loss: 1.284628987312317.   Elapsed: 0:09:13.
0:  electionCLS>4302 1 Bottle.  Pours a clear dark brown color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,050  of  1,907. Loss: 0.2655346989631653.   Elapsed: 0:09:41.
0:  crazyCLS>56242 1 Bottle shared with GregClow.						


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,100  of  1,907. Loss: 0.459739089012146.   Elapsed: 0:10:09.
0:  benchCLS>42935 1 Bottle.  Pours a clear golden color with a medium-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,150  of  1,907. Loss: 0.14691103994846344.   Elapsed: 0:10:36.
0:  incorporatedCLS>46471 1 Bottle.  Pours a dark brown with a thin head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,200  of  1,907. Loss: 0.21402154862880707.   Elapsed: 0:11:04.
0: PeterCLS>3202 1 Bottle.  Pours a deep brown with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,250  of  1,907. Loss: 0.511879563331604.   Elapsed: 0:11:31.
0: uringCLS>43176 1 Bottle.  Pours a dark brown with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,300  of  1,907. Loss: 0.4843144416809082.   Elapsed: 0:11:59.
0:  reproductiveCLS>2519 1 Bottle.  Pours a dark brown color with a medium off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,350  of  1,907. Loss: 0.3060160279273987.   Elapsed: 0:12:27.
0:  zoneCLS>46471 1 Bottle.  Pours a clear amber with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,400  of  1,907. Loss: 0.6282542943954468.   Elapsed: 0:12:54.
0:  commitsCLS>42935 1 Pours a deep copper color with a small white head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,450  of  1,907. Loss: 0.21058666706085205.   Elapsed: 0:13:22.
0:  ironyCLS>56242 1 Bottle.  Pours a dark brown with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,500  of  1,907. Loss: 0.17619769275188446.   Elapsed: 0:13:49.
0:  SahCLS>42935 1 Bottle.  Pours a dark brown with a thin beige


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,550  of  1,907. Loss: 0.2596525549888611.   Elapsed: 0:14:17.
0:  BryanCLS>117319 1 Bottle.  Pours a dark brown with a small head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,600  of  1,907. Loss: 0.2051253765821457.   Elapsed: 0:14:45.
0:  spiritsCLS>65888 1 Bottle.  Pours a dark brown color with a medium tan


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,650  of  1,907. Loss: 0.5159775018692017.   Elapsed: 0:15:12.
0:  seesCLS>43176 1 Bottle shared with blankboy. Poured a deep amber color with


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,700  of  1,907. Loss: 0.2835063636302948.   Elapsed: 0:15:40.
0:  hungryCLS>29028 1 Bottle.  Pours a dark brown with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,750  of  1,907. Loss: 0.13938911259174347.   Elapsed: 0:16:08.
0:  PTCLS>117319 1 Bottle.  Pours a clear copper color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,800  of  1,907. Loss: 0.34989896416664124.   Elapsed: 0:16:35.
0: üCLS>42935 1 Bottle.  Pours a deep golden with a small, dense


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,850  of  1,907. Loss: 0.6099055409431458.   Elapsed: 0:17:03.
0: ruceCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,900  of  1,907. Loss: 0.10041928291320801.   Elapsed: 0:17:30.
0:  derivativesCLS>65888 1 Pours a clear, clear, clear, clear, clear,

  Average training loss: 0.45
  Training epoch took: 0:17:34

Running Validation...
  Validation Loss: 0.35
  Validation took: 0:01:18

Training...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch    50  of  1,907. Loss: 0.23271061480045319.   Elapsed: 0:00:27.
0: CLS>56242 1 Bottle. Pours a hazy dark brown with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   100  of  1,907. Loss: 0.5169233679771423.   Elapsed: 0:00:55.
0:  rememberingCLS>43176 1 Bottle. Pours a cloudy amber color with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   150  of  1,907. Loss: 0.20878896117210388.   Elapsed: 0:01:23.
0:  SourcesCLS>3202 1 Bottle shared by jerc.  Pours a dark brown color


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   200  of  1,907. Loss: 0.11735793203115463.   Elapsed: 0:01:50.
0: emsCLS>46471 1 Bottle.  Poured a dark brown color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   250  of  1,907. Loss: 0.5416931509971619.   Elapsed: 0:02:18.
0: tzCLS>3202 1 Bottle.  Pours a deep brown color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   300  of  1,907. Loss: 0.3441542983055115.   Elapsed: 0:02:46.
0: maticCLS>2519 1 Bottle.  Pours a clear amber color with a small,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   350  of  1,907. Loss: 0.19712711870670319.   Elapsed: 0:03:13.
0:  syndCLS>56242 1 UPDATED: APR 21, 2011  Pours a dark brown


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   400  of  1,907. Loss: 0.14221850037574768.   Elapsed: 0:03:41.
0:  gamCLS>56242 1 Bottle.  Pours a dark brown color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   450  of  1,907. Loss: 0.5739588141441345.   Elapsed: 0:04:08.
0:  injuryCLS>46471 1 Bottle.  Pours a dark brown color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   500  of  1,907. Loss: 0.2552048861980438.   Elapsed: 0:04:36.
0: azaCLS>29028 1 Bottle.  Poured a dark brown with a thin white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   550  of  1,907. Loss: 0.42489421367645264.   Elapsed: 0:05:04.
0:  membraneCLS>43176 1 Bottle.  Pours a deep amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   600  of  1,907. Loss: 0.34759458899497986.   Elapsed: 0:05:31.
0: ijingCLS>2519 1 Bottle. Pours a clear amber color with a small off white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   650  of  1,907. Loss: 0.23807725310325623.   Elapsed: 0:05:59.
0:  castCLS>46471 1 Pours a clear amber with a small white head.  A


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   700  of  1,907. Loss: 0.2941376566886902.   Elapsed: 0:06:26.
0:  purchCLS>2519 1 Bottle.  Pours a dark brown color with a small tan


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   750  of  1,907. Loss: 0.2571309208869934.   Elapsed: 0:06:54.
0:  shouldersCLS>42935 1 Bottle.  Pours a clear amber with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   800  of  1,907. Loss: 0.283415287733078.   Elapsed: 0:07:22.
0:  builtCLS>46471 1 Bottle.  Pours a clear amber with a medium off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   850  of  1,907. Loss: 0.7712092995643616.   Elapsed: 0:07:49.
0:  openlyCLS>2519 1 Bottle.  Pours a dark brown color with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   900  of  1,907. Loss: 0.26819807291030884.   Elapsed: 0:08:17.
0:  haltedCLS>4302 1 12oz bottle. Pours a dark brown with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   950  of  1,907. Loss: 0.2065950334072113.   Elapsed: 0:08:44.
0:  NikCLS>42935 1 Bottle shared by tupalev on 04/27/2009


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,000  of  1,907. Loss: 1.096210241317749.   Elapsed: 0:09:12.
0:  tinCLS>2519 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,050  of  1,907. Loss: 0.22632178664207458.   Elapsed: 0:09:40.
0:  clinicalCLS>42935 1 Bottle.  Pours a clear amber color with a medium off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,100  of  1,907. Loss: 0.4021240770816803.   Elapsed: 0:10:07.
0: lectionsCLS>42935 1 Bottle.  Pours a clear golden color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,150  of  1,907. Loss: 0.1289854645729065.   Elapsed: 0:10:35.
0: elsCLS>2519 1 Bottle.  Pours a clear amber with a thin, creamy


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,200  of  1,907. Loss: 0.17752739787101746.   Elapsed: 0:11:02.
0: labCLS>3202 1 Bottle.  Pours a deep dark brown with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,250  of  1,907. Loss: 0.4375587999820709.   Elapsed: 0:11:30.
0:  tripleCLS>2519 1 Bottle.  Pours a clear amber color with a small,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,300  of  1,907. Loss: 0.42316508293151855.   Elapsed: 0:11:58.
0: 220CLS>2519 1 Bottle.  Pours a clear amber color with a medium off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,350  of  1,907. Loss: 0.23868823051452637.   Elapsed: 0:12:25.
0:  SeeCLS>46471 1 Bottle.  Pours a clear, clear, amber with a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,400  of  1,907. Loss: 0.5238339304924011.   Elapsed: 0:12:53.
0: @@CLS>65888 1 Pours a nice copper color with a nice white head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,450  of  1,907. Loss: 0.19236530363559723.   Elapsed: 0:13:20.
0:  hostCLS>56242 1 750 ml bottle from Cavie. Pours a cloudy orange with


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,500  of  1,907. Loss: 0.15120135247707367.   Elapsed: 0:13:48.
0: roleCLS>42935 1 Bottle.  Pours a dark amber color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,550  of  1,907. Loss: 0.20910444855690002.   Elapsed: 0:14:16.
0: iacCLS>117319 1 Bottle.  Dark brown with a small head.  Ro


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,600  of  1,907. Loss: 0.16130878031253815.   Elapsed: 0:14:43.
0:  LDCLS>117319 1 Bottle.  Pours a dark brown color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,650  of  1,907. Loss: 0.4434838593006134.   Elapsed: 0:15:11.
0:  ListenCLS>43176 1 Bottle shared with HogTownHarry, HogTownHarry, HogTown


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,700  of  1,907. Loss: 0.2560074031352997.   Elapsed: 0:15:38.
0:  dyCLS>42935 1 Bottle from Trader Joes.  Pours a clear amber color


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,750  of  1,907. Loss: 0.13022498786449432.   Elapsed: 0:16:06.
0:  DomesticCLS>46471 1 Bottle. Pours a clear golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,800  of  1,907. Loss: 0.3037470877170563.   Elapsed: 0:16:34.
0:  beneficiariesCLS>42935 1 Bottle.  Pours a deep golden with a thin, off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,850  of  1,907. Loss: 0.4844663143157959.   Elapsed: 0:17:01.
0:  TitleCLS>42935 1 Bottle.  Pours a clear golden color with a medium white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,900  of  1,907. Loss: 0.10398497432470322.   Elapsed: 0:17:29.
0:  μCLS>65888 1 Pours a clear dark brown with a medium tan head. 

  Average training loss: 0.31
  Training epoch took: 0:17:32

Running Validation...
  Validation Loss: 0.36
  Validation took: 0:01:18

Training...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch    50  of  1,907. Loss: 0.22557483613491058.   Elapsed: 0:00:27.
0:  sellingCLS>56242 1 Bottle. Pours a hazy amber color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   100  of  1,907. Loss: 0.45443427562713623.   Elapsed: 0:00:54.
0:  migrantCLS>43176 1 Bottle. Pours a cloudy amber color with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   150  of  1,907. Loss: 0.3687625527381897.   Elapsed: 0:01:22.
0: ivelyCLCL3202 1 Nice amber color with a medium brown, with a small head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   200  of  1,907. Loss: 0.2279045730829239.   Elapsed: 0:01:49.
0:  orderCLS>>>>>>>>>>>>>>>>>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   250  of  1,907. Loss: 0.9349898099899292.   Elapsed: 0:02:17.
0:  VPNCLSSSSSSSSSSSSSSSSSS


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   300  of  1,907. Loss: 0.5760278105735779.   Elapsed: 0:02:44.
0:  explanationCLS>429 1 Bottle.            


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   350  of  1,907. Loss: 0.283032089471817.   Elapsed: 0:03:12.
0:  BachCLS>                


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   400  of  1,907. Loss: 0.2022159993648529.   Elapsed: 0:03:39.
0:  folderCLS>42935 1 Bottle.  Pours a dark brown with a light brown with


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   450  of  1,907. Loss: 0.9536663293838501.   Elapsed: 0:04:07.
0:  buildingCLS>117319 1 Bottle.  Pours a dark brown head.  A


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   500  of  1,907. Loss: 0.2872278392314911.   Elapsed: 0:04:34.
0:  BabylonCLS>117319 1 Bottle.  Poured a dark brown with a light tan


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   550  of  1,907. Loss: 0.49991950392723083.   Elapsed: 0:05:02.
0: perialCLS>42935 1 Bottle.  Pours a dark brown with a small head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   600  of  1,907. Loss: 0.3687889873981476.   Elapsed: 0:05:29.
0:  rentsCLS>117319 1 Bottle.  Pours a dark brown with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   650  of  1,907. Loss: 0.24251601099967957.   Elapsed: 0:05:57.
0:  RegCLS>29028 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   700  of  1,907. Loss: 0.302315354347229.   Elapsed: 0:06:24.
0: olasCLS>3202 1 Bottle.  Pours a dark brown with a small tan head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   750  of  1,907. Loss: 0.26365405321121216.   Elapsed: 0:06:52.
0:  responsesCLS>117319 1 Bottle. Pours a dark brown with a small off white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   800  of  1,907. Loss: 0.27619343996047974.   Elapsed: 0:07:19.
0:  attendanceCLS>117319 1 Bottle. Pours a dark brown with a small tan head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   850  of  1,907. Loss: 0.7499394416809082.   Elapsed: 0:07:47.
0:  rigidCLS>42935 1 Bottle.  Pours a clear amber color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   900  of  1,907. Loss: 0.25463417172431946.   Elapsed: 0:08:15.
0: groCLS>42935 1 Bottle. Pours a clear amber with a medium sized off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   950  of  1,907. Loss: 0.19599904119968414.   Elapsed: 0:08:42.
0:  GreCLS>42935 1 Bottle. Golden yellow color with a small white head. Nose is


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,000  of  1,907. Loss: 1.0763479471206665.   Elapsed: 0:09:10.
0: uraCLS>42935 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,050  of  1,907. Loss: 0.22038400173187256.   Elapsed: 0:09:37.
0:  2020CLS>42935 1 Bottle from Trader Joes. Pours a clear golden color with


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,100  of  1,907. Loss: 0.3733348548412323.   Elapsed: 0:10:05.
0:  chargingCLS>42935 1 Bottle.  Pours a clear golden color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,150  of  1,907. Loss: 0.12994422018527985.   Elapsed: 0:10:32.
0:  SarCLS>46471 1 Bottle.  Pours a clear amber with a thin white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,200  of  1,907. Loss: 0.14039850234985352.   Elapsed: 0:11:00.
0:  JasCLS>65888 1 Bottle.  Pours a dark brown with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,250  of  1,907. Loss: 0.4071336090564728.   Elapsed: 0:11:28.
0:  permitCLS>65888 1 Bottle.  Pours a dark brown with a thin white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,300  of  1,907. Loss: 0.3948700428009033.   Elapsed: 0:11:55.
0:  AdministratorCLS>2519 1 Bottle.  Pours a dark brown with a small, off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,350  of  1,907. Loss: 0.1938847154378891.   Elapsed: 0:12:23.
0:  EVENTSCLS>42935 1 Bottle.  Pours a clear amber color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,400  of  1,907. Loss: 0.47709107398986816.   Elapsed: 0:12:50.
0:  MentalCLS>42935 1 Pours a deep golden color with a small white head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,450  of  1,907. Loss: 0.17250825464725494.   Elapsed: 0:13:18.
0: idsCLS>56242 1 Bottle.  Pours a cloudy, amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,500  of  1,907. Loss: 0.13852518796920776.   Elapsed: 0:13:46.
0: ceansCLS>42935 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,550  of  1,907. Loss: 0.17843769490718842.   Elapsed: 0:14:13.
0:  geneticallyCLS>42935 1 Bottle.  Pours a clear golden with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,600  of  1,907. Loss: 0.16455593705177307.   Elapsed: 0:14:41.
0:  adviceCLS>65888 1 Bottle.  Pours a dark brown color with a medium head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,650  of  1,907. Loss: 0.38611164689064026.   Elapsed: 0:15:08.
0:  incompleteCLS>46471 1 Bottle. Pours a clear golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,700  of  1,907. Loss: 0.23906829953193665.   Elapsed: 0:15:36.
0: commentCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,750  of  1,907. Loss: 0.1126246526837349.   Elapsed: 0:16:04.
0:  pilotCLS>29028 1 Bottle.  Pours a clear copper color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,800  of  1,907. Loss: 0.2680243253707886.   Elapsed: 0:16:31.
0: gersCLS>42935 1 Bottle.  Pours a golden color with a small head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,850  of  1,907. Loss: 0.4262903034687042.   Elapsed: 0:16:59.
0:  mutationCLS>42935 1 Pours a clear golden color with a thin white head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,900  of  1,907. Loss: 0.08193888515233994.   Elapsed: 0:17:26.
0:  DriverCLS>46471 1 Pours a clear golden amber with a small white head. 

  Average training loss: 0.34
  Training epoch took: 0:17:30

Running Validation...
  Validation Loss: 0.36
  Validation took: 0:01:18

Training...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch    50  of  1,907. Loss: 0.1962110996246338.   Elapsed: 0:00:27.
0: itiCLS>65888 1 Pours a dark brown with a small head.  Aroma


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   100  of  1,907. Loss: 0.35034167766571045.   Elapsed: 0:00:55.
0: fffCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   150  of  1,907. Loss: 0.20034468173980713.   Elapsed: 0:01:23.
0: riminationCLS>42935 1 Bottle.  Pours a golden color with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   200  of  1,907. Loss: 0.10998307168483734.   Elapsed: 0:01:50.
0: aqueCLS>46471 1 Bottle.  Pours a golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   250  of  1,907. Loss: 0.49267348647117615.   Elapsed: 0:02:18.
0: leeCLS>46471 1 Bottle. Pours a clear golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   300  of  1,907. Loss: 0.28901901841163635.   Elapsed: 0:02:45.
0:  AlzheimerCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   350  of  1,907. Loss: 0.18756377696990967.   Elapsed: 0:03:13.
0:  checkingCLS>46471 1 UPDATED: SEP 11, 2011 Bottle.  Pours


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   400  of  1,907. Loss: 0.11528453975915909.   Elapsed: 0:03:41.
0: nesiumCLS>42935 1 Bottle.  Pours a clear golden color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   450  of  1,907. Loss: 0.3192884027957916.   Elapsed: 0:04:08.
0:  1957CLS>46471 1 500 ml bottle.  Pours a golden orange with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   500  of  1,907. Loss: 0.22872142493724823.   Elapsed: 0:04:36.
0: manshipCLS>29028 1 Bottle.  Poured a hazy amber with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   550  of  1,907. Loss: 0.3748632073402405.   Elapsed: 0:05:03.
0:  understandCLS>43176 1 Bottle.  Pours a deep amber color with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   600  of  1,907. Loss: 0.2835221588611603.   Elapsed: 0:05:31.
0:  passionCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   650  of  1,907. Loss: 0.20052267611026764.   Elapsed: 0:05:59.
0:  curvedCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   700  of  1,907. Loss: 0.2593579590320587.   Elapsed: 0:06:26.
0:  AnCLS>42935 1 12oz bottle.  Pours a clear golden color with a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   750  of  1,907. Loss: 0.2034137099981308.   Elapsed: 0:06:54.
0: ottiCLS>42935 1 Bottle. Pours a clear golden with a small white head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   800  of  1,907. Loss: 0.2595638930797577.   Elapsed: 0:07:21.
0:  intakeCLS>42935 1 Bottle. Pours a clear golden amber with a small off-


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   850  of  1,907. Loss: 0.5348326563835144.   Elapsed: 0:07:49.
0:  TsCLS>42935 1 Bottle.  Pours a clear golden color with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   900  of  1,907. Loss: 0.20581254363059998.   Elapsed: 0:08:17.
0:  MainCLS>29028 1 2005 bottle.  Pours a clear amber with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   950  of  1,907. Loss: 0.16942903399467468.   Elapsed: 0:08:44.
0:  temCLS>42935 1 Bottle.  Pours a clear golden with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,000  of  1,907. Loss: 0.8714894652366638.   Elapsed: 0:09:12.
0:  ClickCLS>29028 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,050  of  1,907. Loss: 0.19690576195716858.   Elapsed: 0:09:40.
0:  XVCLS>42935 1 Bottle. Pours a clear golden color with a thin white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,100  of  1,907. Loss: 0.2924382984638214.   Elapsed: 0:10:07.
0:  dormCLS>42935 1 Bottle.  Pours a clear golden color with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,150  of  1,907. Loss: 0.12471206486225128.   Elapsed: 0:10:35.
0:  upstreamCLS>2519 1 Bottle.  Pours a dark brown with a thin head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,200  of  1,907. Loss: 0.10107682645320892.   Elapsed: 0:11:02.
0: الCLS>65888 1 Bottle.  Pours a dark brown with a small head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,250  of  1,907. Loss: 0.3371446430683136.   Elapsed: 0:11:30.
0: ariaCLS>2519 1 Bottle.  Pours a clear amber color with a small,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,300  of  1,907. Loss: 0.33357977867126465.   Elapsed: 0:11:58.
0: PGCLS>2519 1 Pours a dark amber color with a medium off white head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,350  of  1,907. Loss: 0.15873660147190094.   Elapsed: 0:12:25.
0: IllCLS>117319 1 Bottle.  Pours a dark brown with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,400  of  1,907. Loss: 0.38324305415153503.   Elapsed: 0:12:53.
0:  automatedCLS>42935 1 Pours a deep golden color with a small white head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,450  of  1,907. Loss: 0.15153636038303375.   Elapsed: 0:13:20.
0:  hiringCLS>46471 1 500 ml bottle.  Pours a clear golden with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,500  of  1,907. Loss: 0.11748252063989639.   Elapsed: 0:13:48.
0:  tweetingCLS>42935 1 Bottle.  Pours a clear amber color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,550  of  1,907. Loss: 0.12340964376926422.   Elapsed: 0:14:16.
0: abitCLS>42935 1 Bottle.  Pours a clear golden amber with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,600  of  1,907. Loss: 0.14326415956020355.   Elapsed: 0:14:43.
0:  iiCLS>42935 1 Bottle.  Pours a clear golden amber with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,650  of  1,907. Loss: 0.28718385100364685.   Elapsed: 0:15:11.
0: clipCLS>117319 1 Bottle.  Pours a deep dark brown color with a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,700  of  1,907. Loss: 0.21150924265384674.   Elapsed: 0:15:38.
0:  FrancCLS>42935 1 Bottle from Trader Joes. Pours a clear golden amber color


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,750  of  1,907. Loss: 0.1004801094532013.   Elapsed: 0:16:06.
0:  listeningCLS>46471 1 Bottle. Pours a clear golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,800  of  1,907. Loss: 0.22806894779205322.   Elapsed: 0:16:34.
0:  EUCLS>42935 1 Bottle.  Pours a clear golden with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,850  of  1,907. Loss: 0.3169475197792053.   Elapsed: 0:17:01.
0:  incredibleCLS>42935 1 Bottle.  Pours a clear golden with a thin white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,900  of  1,907. Loss: 0.08072289079427719.   Elapsed: 0:17:29.
0:  TYCLS>46471 1 Bottle. Pours a clear golden with a medium white head.

  Average training loss: 0.24
  Training epoch took: 0:17:33

Running Validation...
  Validation Loss: 0.38
  Validation took: 0:01:18

Training...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch    50  of  1,907. Loss: 0.18412181735038757.   Elapsed: 0:00:27.
0:  230CLS>65888 1 Pours a clear brown color with a medium tan head. 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   100  of  1,907. Loss: 0.3097003102302551.   Elapsed: 0:00:55.
0:  TigersCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   150  of  1,907. Loss: 0.17839258909225464.   Elapsed: 0:01:23.
0: CDCLS>42935 1 Bottle. Pours a golden color with a small white head.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   200  of  1,907. Loss: 0.10700725018978119.   Elapsed: 0:01:50.
0:  unbelievableCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   250  of  1,907. Loss: 0.43990814685821533.   Elapsed: 0:02:18.
0: RobertCLS>42935 1 Bottle. Pours a clear golden color with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   300  of  1,907. Loss: 0.2237934172153473.   Elapsed: 0:02:46.
0:  ContactCLS>42935 1 Bottle. Pours clear golden with a small white head. A


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   350  of  1,907. Loss: 0.17605900764465332.   Elapsed: 0:03:13.
0:  fountainCLS>46471 1 Bottle.  Pours a clear golden with a small white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   400  of  1,907. Loss: 0.08265814185142517.   Elapsed: 0:03:41.
0:  clashCLS>42935 1 Bottle.  Pours a clear golden amber with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   450  of  1,907. Loss: 0.20255248248577118.   Elapsed: 0:04:08.
0:  commodCLS>42935 1 Bottle.  Pours a clear golden amber with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   500  of  1,907. Loss: 0.20235826075077057.   Elapsed: 0:04:36.
0:  algorithmCLS>29028 1 Bottle.  Poured a hazy amber with a small off


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   550  of  1,907. Loss: 0.3162059485912323.   Elapsed: 0:05:04.
0:  accurateCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   600  of  1,907. Loss: 0.23076437413692474.   Elapsed: 0:05:31.
0: adoCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   650  of  1,907. Loss: 0.192273810505867.   Elapsed: 0:05:59.
0:  regulateCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   700  of  1,907. Loss: 0.22442875802516937.   Elapsed: 0:06:26.
0: ewayCLS>29028 1 Bottle.  Pours a hazy amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   750  of  1,907. Loss: 0.1615660935640335.   Elapsed: 0:06:54.
0: itteeCLS>42935 1 Pours a clear golden with a small white head.  A


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   800  of  1,907. Loss: 0.22879692912101746.   Elapsed: 0:07:22.
0:  JackieCLS>46471 1 Bottle. Golden color with a small white head. Aroma of


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   850  of  1,907. Loss: 0.39991453289985657.   Elapsed: 0:07:49.
0: HeCLS>42935 1 Bottle.  Pours a clear golden color with a medium sized


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   900  of  1,907. Loss: 0.17474110424518585.   Elapsed: 0:08:17.
0:  wiredCLS>29028 1 2005 bottle.  Pours a dark amber color with a small


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch   950  of  1,907. Loss: 0.15489700436592102.   Elapsed: 0:08:44.
0:  frequCLS>42935 1 Bottle.  Pours a clear golden with a medium white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,000  of  1,907. Loss: 0.6737091541290283.   Elapsed: 0:09:12.
0:  viewsCLS>42935 1 Bottle.  Pours a clear golden color with a medium white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,050  of  1,907. Loss: 0.17113052308559418.   Elapsed: 0:09:40.
0:  StrategyCLS>42935 1 Bottle. Pours a clear golden color with a medium white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,100  of  1,907. Loss: 0.22294841706752777.   Elapsed: 0:10:07.
0:  radioCLS>42935 1 Bottle. Pours a clear golden color with a medium white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,150  of  1,907. Loss: 0.10802827775478363.   Elapsed: 0:10:35.
0: oultCLS>42935 0 Bottle.  Pours a clear golden color with a thin white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,200  of  1,907. Loss: 0.09155299514532089.   Elapsed: 0:11:03.
0:  ReinCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,250  of  1,907. Loss: 0.2810986340045929.   Elapsed: 0:11:30.
0:  procCLS>46471 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,300  of  1,907. Loss: 0.2895773649215698.   Elapsed: 0:11:58.
0:  abusesCLS>46471 1 Bottle.  Pours a clear golden orange with a white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,350  of  1,907. Loss: 0.13035225868225098.   Elapsed: 0:12:25.
0:  waCLS>46471 1 Bottle. Golden colour, small white head. Aroma is ho


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,400  of  1,907. Loss: 0.29609814286231995.   Elapsed: 0:12:53.
0: lectedCLS>46471 1 Bottle. Golden colour with a small white head. Aroma is


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,450  of  1,907. Loss: 0.1185527816414833.   Elapsed: 0:13:21.
0:  templeCLS>46471 1 Bottle.  Pours a clear golden orange with a white head


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,500  of  1,907. Loss: 0.10077902674674988.   Elapsed: 0:13:48.
0:  lodCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,550  of  1,907. Loss: 0.09208613634109497.   Elapsed: 0:14:16.
0:  RioCLS>42935 1 Bottle.  Pours a clear golden amber with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,600  of  1,907. Loss: 0.1038927510380745.   Elapsed: 0:14:43.
0:  PenceCLS>42935 1 Bottle.  Pours a clear golden amber with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,650  of  1,907. Loss: 0.22477704286575317.   Elapsed: 0:15:11.
0:  StevenCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,700  of  1,907. Loss: 0.16425752639770508.   Elapsed: 0:15:39.
0:  discourseCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,750  of  1,907. Loss: 0.06811099499464035.   Elapsed: 0:16:06.
0:  accessoryCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,800  of  1,907. Loss: 0.1861092448234558.   Elapsed: 0:16:34.
0: ulCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,850  of  1,907. Loss: 0.20013566315174103.   Elapsed: 0:17:01.
0:  slidingCLS>42935 1 Bottle.  Pours a clear golden color with a small white


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  Batch 1,900  of  1,907. Loss: 0.06851405650377274.   Elapsed: 0:17:29.
0:  DellCLS>46471 1 Bottle. Golden with a small white head. Aroma is ho

  Average training loss: 0.20
  Training epoch took: 0:17:33

Running Validation...
  Validation Loss: 0.41
  Validation took: 0:01:18

Training complete!
Total training took 1:34:11 (h:mm:ss)


In [15]:
### Prompty Tokens
'''
beerID  name                     
42935	   Trader Joes Hofbrau Bock	
43176	  Trader Joes Vienna Style Lager	
46471	  Ridgeway IPA	
65888	  Saranac Brown Ale	
117319	Harpoon 100 Barrel Series #30 - Island Creek Oyster Stout	
2519	  Shipyard Chamberlain Pale Ale	
29028	  De Hemel Nieuw Ligt Grand Cru	
3202	  Penn Dark Lager Beer	
4302	  Gearys London Porter	
56242	  East End Gratitude	

Then 1 for good or 0 for bad
'''

beer = input('Enter a beer ID: ')
rating = input('Review Good or Bad [1/0] ? ')



Enter a beer ID: 42935
Review Good or Bad [1/0] ? 1


In [21]:
model.eval()

prompt_good = beer + " 1" + rating
prompt_bad = beer + " 1" + rating

generated = torch.tensor(tokenizer.encode(prompt_good)).unsqueeze(0)
generated = generated.to(device)

#print(generated)

sample_outputs = model.generate(
                                generated, 
                                #bos_token_id=random.randint(1,30000),
                                do_sample=True,   
                                top_k=50, 
                                max_length = 300,
                                top_p=0.95, 
                                num_return_sequences=3
                                )
print('\nGood reviews for the beer are:\n')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}\n\n".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

generated = torch.tensor(tokenizer.encode(prompt_bad)).unsqueeze(0)
generated = generated.to(device)

#print(generated)

sample_outputs = model.generate(
                                generated, 
                                #bos_token_id=random.randint(1,30000),
                                do_sample=True,   
                                top_k=50, 
                                max_length = 300,
                                top_p=0.95, 
                                num_return_sequences=3
                                )
print('Bad reviews for the beer are:\n')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}\n\n".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Good reviews for the beer are:

0: 42935 11.2 oz bottle. Aroma is sweet. Taste is sweet. A little sticky in the end. Well made stuff.<SEP>,.<SEPours3.42902904345610S 0290S>11S>43SS>43S>1129010S>3 (4S3. 34. Wow290290 (1SEPours290S2901143SEPours2903290 -311S>56S>3290S>56S-4S>10S>56S> 4PoursS>11290 (SEPoursS>25343S	Sells	6511


1: 42935 11.5 fl oz bottle.  Aroma is sweet, and slightly alcoholic.  Flavor is sweet, slightly alcoholic, a little bit chalky, and slightly bitter.<SEP>8a.	S=S -3431143 -44293SEP	4 a.290811:464 rottenSEPoursS11S.4645S>3SEP	. SE	464S>4SEPoursS<SEPours29046411. 10311290113290 (4430SEPoursSEP	SEP	3023	SEPou3SEP>3.11


2: 42935 11.2 fl oz bottle, courtesy of my friend Olga. Pours a rich, lightly hazy, dark-brown color with a white head. The aroma is sweet and floral. The taste is sweet and floral. Medium mouthfeel and finish. Pretty standard mouthfeel. Just ok.<SEP>6571 1  04329065290 rotten rotten29010S>464S>464 (S44242 (71.10SEPours2903<SEP	290 (46429011SEP>11290S>4

In [17]:
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()

output_dir = './model_save/'

# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)



Saving model to ./model_save/


('./model_save/tokenizer_config.json',
 './model_save/special_tokens_map.json',
 './model_save/vocab.json',
 './model_save/merges.txt',
 './model_save/added_tokens.json')

In [19]:
!zip -r /content/model_save.zip /content/model_save

  adding: content/model_save/ (stored 0%)
  adding: content/model_save/tokenizer_config.json (deflated 65%)
  adding: content/model_save/pytorch_model.bin (deflated 9%)
  adding: content/model_save/merges.txt (deflated 53%)
  adding: content/model_save/config.json (deflated 51%)
  adding: content/model_save/vocab.json (deflated 63%)
  adding: content/model_save/special_tokens_map.json (deflated 69%)
  adding: content/model_save/added_tokens.json (stored 0%)


In [20]:
from google.colab import files
files.download("/content/model_save.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>