<a href="https://colab.research.google.com/github/bobohope/bdc-summer-2024/blob/main/deep_learning_notebooks/gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🚀 GPT

In this notebook, we'll walk through the steps required to train your own GPT model on the wine review dataset

The code is adapted from the excellent [GPT tutorial](https://keras.io/examples/generative/text_generation_with_miniature_gpt/) created by Apoorv Nandan available on the Keras website.

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

## 0. Parameters <a name="parameters"></a>

In [2]:
VOCAB_SIZE = 10000
MAX_LEN = 80
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 2
FEED_FORWARD_DIM = 256
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 32
EPOCHS = 5

## 1. Load the data <a name="load"></a>

In [3]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)
GOOGLE_DRIVE_DIR = "/content/gdrive/My Drive/bdc_2024_summer/data/winemag-data-130k-v2.json"

Mounted at /content/gdrive


In [16]:
# Load the full dataset
with open(GOOGLE_DRIVE_DIR ) as json_data:
    wine_data = json.load(json_data)

In [17]:
wine_data[10]

{'points': '87',
 'title': 'Kirkland Signature 2011 Mountain Cuvée Cabernet Sauvignon (Napa Valley)',
 'description': 'Soft, supple plum envelopes an oaky structure in this Cabernet, supported by 15% Merlot. Coffee and chocolate complete the picture, finishing strong at the end, resulting in a value-priced wine of attractive flavor and immediate accessibility.',
 'taster_name': 'Virginie Boone',
 'taster_twitter_handle': '@vboone',
 'price': 19,
 'designation': 'Mountain Cuvée',
 'variety': 'Cabernet Sauvignon',
 'region_1': 'Napa Valley',
 'region_2': 'Napa',
 'province': 'California',
 'country': 'US',
 'winery': 'Kirkland Signature'}

In [18]:
# Filter the dataset
filtered_data = [
    "wine review : "
    + x["country"]
    + " : "
    + x["province"]
    + " : "
    + x["variety"]
    + " : "
    + x["description"]
    for x in wine_data
    if x["country"] is not None
    and x["province"] is not None
    and x["variety"] is not None
    and x["description"] is not None
]

In [19]:
# Count the reviews
n_wines = len(filtered_data)
print(f"{n_wines} reviews loaded")

129907 reviews loaded


In [20]:
example = filtered_data[25]
print(example)

wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard-designated Pinot that hails from a high-elevation site. Small in production, it offers intense, full-bodied raspberry and blackberry steeped in smoky spice and smooth texture.


## 2. Tokenize the data <a name="tokenize"></a>

In [9]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in filtered_data]

In [10]:
# Display an example of wine review
example_data = text_data[25]
example_data

'wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard - designated Pinot that hails from a high - elevation site . Small in production , it offers intense , full - bodied raspberry and blackberry steeped in smoky spice and smooth texture . '

In [11]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [12]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [13]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [14]:
# Display some token:word mappings
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

0: 
1: [UNK]
2: :
3: ,
4: .
5: and
6: the
7: wine
8: a
9: of


In [15]:
# Display the same example converted to ints
example_tokenised = vectorize_layer(example_data)
print(example_tokenised.numpy())

[   7   10    2   20    2   29    2   43   62    2   55    5  243 4145
  453  634   26    9  497  499  667   17   12  142   14 2214   43   25
 2484   32    8  223   14 2213  948    4  594   17  987    3   15   75
  237    3   64   14   82   97    5   74 2633   17  198   49    5  125
   77    4    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0]


## 3. Create the Training Set <a name="create"></a>

In [21]:
# Create the training set of reviews and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [22]:
example_input_output = train_ds.take(1).get_single_element()

In [23]:
# Example Input
example_input_output[0][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([   7,   10,    2,   20,    2,  103,    2,   92,    2, 1036,   19,
       1289, 1195,    3,   12,   13,    8,  360,   92,   32,    8, 1808,
        948,    3, 2833,    6,  682,   23,   89,  138,    5,  180,   22,
         16,    9,  221,  383,    5,  326,  196,    4,   15,   18,   21,
         71,  109,    5,    6,   34,   39, 2105,   38,    3,   11,   99,
        623,    4,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0])>

In [24]:
# Example Output (shifted by one token)
example_input_output[1][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([  10,    2,   20,    2,  103,    2,   92,    2, 1036,   19, 1289,
       1195,    3,   12,   13,    8,  360,   92,   32,    8, 1808,  948,
          3, 2833,    6,  682,   23,   89,  138,    5,  180,   22,   16,
          9,  221,  383,    5,  326,  196,    4,   15,   18,   21,   71,
        109,    5,    6,   34,   39, 2105,   38,    3,   11,   99,  623,
          4,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0])>

## 5. Create the causal attention mask function <a name="causal"></a>

1 means unmasked and 0 means masked. we need to apply the mask to avoid information from future words leaking through.

In [25]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)


np.transpose(causal_attention_mask(1, 10, 10, dtype=tf.int32)[0])

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=int32)

## 6. Create a Transformer Block layer <a name="transformer"></a>

In [26]:
class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1):
        super(TransformerBlock, self).__init__() #<1>
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        # multihead head attention layer
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        ) #<2>
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        ) #<3>
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output) #<4>
        ffn_1 = self.ffn_1(out1)#<5>
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)#<6>

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

1. The sublayers that make up the TransformerBlock layer are defined within the initialization function
2. causal mask is created to hide future keys from the query
3. The multihead attention layer is created, with the attention masks specified
4. The first add and normalization layer
5. The feed-forward layers
6. The second add and normalization layer

## 7. Create the Token and Position Embedding <a name="embedder"></a>

The token embeddings are added to the positional embeddings to give the token position encoding

In [27]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )#<1>
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)#<2>

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions #<3>

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

1. The tokens are embeded using an `Embedding` layer.
2. The positions of the tokens are also embedded using `Embedding` layer.
3. The output from the layer is the sum of the token and position embeddings.

## 8. Build the Transformer model <a name="transformer_decoder"></a>

To put everything together, we need to pass our input text through the token and position embedding layer, then through the Transformer block. The final output of the network is a simple `Dense` layer with softmax activation over the number of words in the vocabulary.

In [28]:
inputs = layers.Input(shape=(None,), dtype=tf.int32) #<1>
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs) #<2>
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x) #<3>
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x) #<4>
gpt = models.Model(inputs=inputs, outputs=[outputs, attention_scores]) #<5>
gpt.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None]) #<6>

1. The input is padded with zeros
2. The text is encoded using a `TokenAndPositionEmbedding` layer
3. The encoding is passed through a `TransformerBlock`
4. The transformed output is passed through a `Dense` layer with softmax activation function to predict a distribution over the subsequent word
5. The `Model` takes a sequence of word tokens as input and outputs the predicted subsequent word distribution. the output from the `TransformerBlock` is also returned so that we can inspect how the model is directing its attention.
6. The model is compiled with `SparseCategoricalCrossentropy` loss over the predicted word distribution

In [29]:
gpt.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 256)         2580480   
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_block (Transfo  ((None, None, 256),       658688    
 rmerBlock)                   (None, 2, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 10000)       2570000   
                                                                 
Total params: 5809168 (22.16 MB)
Trainable params: 5809168 (22.16 MB)
Non-trainable params: 0 (0.00 Byte)
_____________________

In [30]:
if LOAD_MODEL:
    # model.load_weights('./models/model')
    gpt = models.load_model("./models/gpt", compile=True)

## 9. Train the Transformer <a name="train"></a>

In [31]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("wine review", max_tokens=80, temperature=1.0)

In [32]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [33]:
gpt.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5
generated text:
wine review : france : alsace : gewürztraminer : the orange peel is rich by a toned dry version of gewurztraminer with great leaves that the rich final acidity . there is a touch of bitter chocolate . the fruit flavors come through as an apparent hint of honey are both present with aging in 3–5 years to come . 

Epoch 2/5
generated text:
wine review : us : california : pinot noir : the cool area of cambria shows the warmth of chaparral that give the nose of this wine substantial time in the glass , which causes a savory , purple fruit and dry spicy thyme on the palate . black plum and strawberry flavors recall cooked black cherry , a lively touch of cracked pepper . 

Epoch 3/5
generated text:
wine review : chile : leyda valley : pinot noir : smells smoky and oaky . that feels fresh and burnt , with perfectly ripened berry flavors , while the fruit fades quickly on the finish . 

Epoch 4/5
generated text:
wine review : argentina : mendoza province : malbec : o

<keras.src.callbacks.History at 0x7ea861dcba60>

In [34]:
# Save the final model
gpt.save("./models/gpt")

# 3. Generate text using the Transformer

We can generate new text by applying the following process:
1. Feed the network with an existing sequence of words and ask it to predict the following word.
2. Append this word to the existing sequence and repeat.

The network will output a set of probabilites for each word that we can sample from so we can make the text generation stochastic, rather than deterministic

In [35]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        highlighted_text = []
        for word, att_score in zip(
            i["prompt"].split(), np.mean(i["atts"], axis=0)
        ):
            highlighted_text.append(
                '<span style="background-color:rgba(135,206,250,'
                + str(att_score / max(np.mean(i["atts"], axis=0)))
                + ');">'
                + word
                + "</span>"
            )
        highlighted_text = " ".join(highlighted_text)
        display(HTML(highlighted_text))

        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
        print("--------\n")

We will use two different temperature.

Both are stylistically similar to a wine review
Both open with the region and type of wine, and the wine type stays consistent throughout the passage.

- temp=1 more adventurous and therefore less accurate, more variety
- temp=0.5 less variety and more accurate

In [36]:
info = text_generator.generate(
    "wine review : us", max_tokens=80, temperature=1.0
)


generated text:
wine review : us : california : chardonnay : perhaps the wine seems now all didn ' t fully ripen many chards region . the vineyard is far more expensive . here is failla , with apricots , too popular oak sticks out . hold until the chilly vintage . feels massive and a rich wine , at the core of pineapples , mangoes and limes . 



In [37]:
info = text_generator.generate(
    "wine review : italy", max_tokens=80, temperature=0.5
)


generated text:
wine review : italy : tuscany : sangiovese : this is a simple , fresh sangiovese from the abruzzo region . it has a very firm mouthfeel , with a bright , fruity mouthfeel and a touch of tannin . 



we can also ask the model to tell us how much attention is being placed on each word, when predicting on the next word. The `Transformerblock` outputs the attention weights for each head, which are a softmax distribution over the preceding words in the sentence.

We show the top five tokens with the hightest probabilities, as well as the average attention across all heads, against each preceding words. The preceding words are colored according to their attention score, average across all attention heads. Darker blue indicates more attention.

In [38]:
info = text_generator.generate(
    "wine review : germany", max_tokens=80, temperature=0.5
)
print_probs(info, vocab)


generated text:
wine review : germany : mosel : riesling : a touch of spice lends complexity to this intensely fruity riesling . it ' s a spry wine , dancing in the mouth and spry . it ' s a [UNK] of juicy , forward , fruity and refreshing , with a hint of tangerine and lime acidity . 



::   	100.0%
zealand:   	0.0%
-:   	0.0%
grosso:   	0.0%
grasparossa:   	0.0%
--------



mosel:   	89.66%
rheinhessen:   	6.18%
rheingau:   	3.86%
pfalz:   	0.25%
nahe:   	0.02%
--------



::   	100.0%
-:   	0.0%
grosso:   	0.0%
,:   	0.0%
valley:   	0.0%
--------



riesling:   	99.99%
weissburgunder:   	0.01%
pinot:   	0.0%
grüner:   	0.0%
red:   	0.0%
--------



::   	100.0%
-:   	0.0%
grosso:   	0.0%
,:   	0.0%
blanc:   	0.0%
--------



a:   	26.74%
while:   	10.64%
fresh:   	9.04%
whiffs:   	8.94%
this:   	7.34%
--------



crush:   	21.83%
whiff:   	21.55%
hint:   	15.7%
touch:   	9.21%
revitalizing:   	4.58%
--------



of:   	99.98%
on:   	0.01%
off:   	0.0%
sweet:   	0.0%
more:   	0.0%
--------



smoke:   	64.65%
minerality:   	8.5%
spice:   	4.97%
petrol:   	4.75%
honey:   	2.81%
--------



adds:   	44.16%
lends:   	33.64%
and:   	15.5%
,:   	1.55%
from:   	1.05%
--------



a:   	62.39%
complexity:   	23.46%
an:   	3.78%
depth:   	2.49%
freshness:   	1.7%
--------



to:   	99.46%
and:   	0.46%
in:   	0.05%
on:   	0.02%
,:   	0.01%
--------



this:   	97.49%
the:   	0.94%
pristine:   	0.72%
a:   	0.37%
fresh:   	0.11%
--------



intensely:   	37.55%
riesling:   	5.5%
fresh:   	5.5%
off:   	5.5%
zesty:   	3.96%
--------



fruity:   	62.12%
aromatic:   	18.61%
concentrated:   	7.82%
ripe:   	4.78%
juicy:   	2.3%
--------



riesling:   	47.59%
,:   	46.01%
wine:   	2.35%
auslese:   	1.55%
kabinett:   	1.0%
--------



.:   	99.08%
,:   	0.84%
full:   	0.04%
that:   	0.03%
and:   	0.0%
--------



it:   	88.7%
the:   	3.83%
off:   	1.84%
on:   	1.35%
fresh:   	0.6%
--------



':   	100.0%
has:   	0.0%
is:   	0.0%
juxtaposes:   	0.0%
offers:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
11:   	0.0%
d:   	0.0%
--------



off:   	29.5%
a:   	22.18%
dry:   	10.83%
intensely:   	10.42%
fresh:   	2.87%
--------



refreshingly:   	17.3%
delightfully:   	14.48%
crush:   	8.27%
delicately:   	5.57%
forward:   	5.17%
--------



,:   	80.18%
wine:   	18.66%
and:   	0.84%
white:   	0.1%
but:   	0.06%
--------



,:   	49.13%
that:   	33.89%
with:   	13.44%
to:   	2.34%
marked:   	0.41%
--------



but:   	66.52%
with:   	15.04%
yet:   	3.35%
cutting:   	2.54%
though:   	1.37%
--------



in:   	70.39%
and:   	19.1%
on:   	7.52%
with:   	1.55%
,:   	1.08%
--------



acidity:   	56.54%
the:   	12.87%
texture:   	10.52%
feel:   	7.41%
its:   	5.1%
--------



mouth:   	98.61%
palate:   	1.01%
glass:   	0.32%
midpalate:   	0.03%
background:   	0.0%
--------



,:   	76.73%
and:   	8.87%
with:   	8.26%
.:   	5.2%
of:   	0.59%
--------



offers:   	11.94%
penetrates:   	10.75%
dancing:   	8.16%
has:   	6.68%
fresh:   	6.54%
--------



,:   	65.04%
.:   	30.48%
with:   	3.63%
on:   	0.46%
in:   	0.2%
--------



it:   	77.6%
drink:   	12.14%
a:   	1.48%
enjoy:   	1.42%
:   	1.03%
--------



':   	99.91%
finishes:   	0.08%
has:   	0.01%
is:   	0.0%
offers:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
,:   	0.0%
d:   	0.0%
--------



a:   	76.96%
dry:   	6.21%
off:   	4.02%
an:   	2.45%
dancing:   	2.1%
--------



refreshingly:   	20.14%
shade:   	15.78%
bit:   	9.83%
thirst:   	8.62%
crush:   	6.71%
--------



wine:   	43.52%
of:   	38.43%
,:   	7.87%
[UNK]:   	3.33%
crush:   	1.52%
--------



lime:   	35.16%
lemon:   	24.8%
[UNK]:   	7.98%
tangerine:   	7.01%
fresh:   	4.07%
--------



,:   	71.59%
tangerine:   	19.44%
acidity:   	1.88%
white:   	1.73%
orange:   	0.87%
--------



fruity:   	39.59%
fresh:   	22.63%
ripe:   	14.9%
forward:   	4.62%
sweet:   	4.12%
--------



,:   	59.4%
and:   	38.63%
with:   	0.9%
fruit:   	0.46%
tangerine:   	0.11%
--------



fruity:   	88.95%
ripe:   	3.91%
juicy:   	2.09%
fresh:   	1.1%
quaffable:   	0.97%
--------



and:   	98.09%
,:   	0.84%
wine:   	0.35%
riesling:   	0.18%
with:   	0.11%
--------



forward:   	40.22%
sweet:   	20.64%
juicy:   	8.42%
fresh:   	7.95%
fruity:   	4.43%
--------



.:   	61.32%
,:   	31.93%
on:   	2.77%
with:   	2.58%
wine:   	0.59%
--------



with:   	81.73%
but:   	10.97%
yet:   	3.87%
it:   	1.78%
finishing:   	0.44%
--------



a:   	95.68%
just:   	2.33%
an:   	0.72%
flavors:   	0.37%
loads:   	0.25%
--------



long:   	23.27%
hint:   	18.0%
touch:   	17.41%
lingering:   	14.0%
kiss:   	4.21%
--------



of:   	100.0%
at:   	0.0%
and:   	0.0%
,:   	0.0%
that:   	0.0%
--------



lime:   	37.64%
honey:   	12.26%
minerality:   	7.5%
orange:   	6.74%
lemon:   	5.59%
--------



and:   	86.09%
acidity:   	7.99%
,:   	2.47%
flavor:   	1.57%
on:   	0.5%
--------



lime:   	82.24%
lemon:   	9.21%
tangerine:   	3.4%
grapefruit:   	0.87%
honey:   	0.76%
--------



flavors:   	37.66%
zest:   	27.44%
acidity:   	20.98%
pith:   	4.25%
-:   	4.06%
--------



.:   	94.66%
,:   	2.78%
that:   	2.31%
on:   	0.13%
to:   	0.08%
--------



drink:   	39.91%
:   	38.37%
it:   	15.98%
finishes:   	3.79%
enjoy:   	0.78%
--------

