In [1]:
import tensorflow as tf
import numpy as np

2023-11-10 10:55:50.603243: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
import keras_nlp

Using TensorFlow backend


In [8]:
embeddings_index = {}

with open('glove.6B.50d.txt') as f:
  for line in f:
    word, coefs = line.split(maxsplit=1)
    coefs = np.fromstring(coefs, "f", sep = ' ')
    embeddings_index[word] = coefs

In [9]:
embeddings_index ['movie']

array([ 0.30824 ,  0.17223 , -0.23339 ,  0.023105,  0.28522 ,  0.23076 ,
       -0.41048 , -1.0035  , -0.2072  ,  1.4327  , -0.80684 ,  0.68954 ,
       -0.43648 ,  1.1069  ,  1.6107  , -0.31966 ,  0.47744 ,  0.79395 ,
       -0.84374 ,  0.064509,  0.90251 ,  0.78609 ,  0.29699 ,  0.76057 ,
        0.433   , -1.5032  , -1.6423  ,  0.30256 ,  0.30771 , -0.87057 ,
        2.4782  , -0.025852,  0.5013  , -0.38593 , -0.15633 ,  0.45522 ,
        0.04901 , -0.42599 , -0.86402 , -1.3076  , -0.29576 ,  1.209   ,
       -0.3127  , -0.72462 , -0.80801 ,  0.082667,  0.26738 , -0.98177 ,
       -0.32147 ,  0.99823 ], dtype=float32)

# Get the movie data

In [10]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000, maxlen=250)

In [11]:
x_all = np.append(x_train, x_test)

In [13]:
x_all_padded = tf.keras.utils.pad_sequences(x_all, padding = "post")

In [14]:
word_lookup = tf.keras.datasets.imdb.get_word_index()

In [15]:
word_lookup['movie']

17

In [16]:
inverted_word_lookup = dict((index +3, word) for (word, index) in word_lookup.items())

inverted_word_lookup[0] = "[PAD]"
inverted_word_lookup[1] = "[START]"
inverted_word_lookup[2] = "[OOV]"
inverted_word_lookup[3] = "[NA]"

In [17]:
def decode(x):
    return "".join(inverted_word_lookup[i] for i in x)

In [18]:
decode(x_all[1000])

"[START]theviewerleaveswonderingwhyhebotheredtowatchthisoneorwhyforthatmatteranyonebotheredtomakeitthereisnoplotjustrandomscenesofridiculousaction[OOV][OOV]showersceneappealstothemale[OOV]butthat'snotmuchreasontomakeamovie"

## Build embedding model

In [30]:
embedding_matrix = np.zeros((10004, 50))

hits = 0
misses = 0

for i, word in inverted_word_lookup.items():
    if (i >= 10004):
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
        print(word)

print("%d hits, %d misses" % (hits, misses))

else's
miyazaki's
victoria's
paul's
chan's
show's
wife's
character's
hadn't
isn't
haven't
wouldn't
its'
she'd
she's
paperhouse
they'll
it's
it'd
daughter's
ted's
ben's
america's
men's
he'll
john's
audience's
30's
mom's
hero's
hasn't
should've
imho
keaton's
they'd
zelah
you'll
smith's
girls'
craven's
feinstone
moore's
eastwood's
kids'
tv's
town's
anyone's
writer's
1960's
kubrick's
husband's
allen's
80's
stewart's
t'aime
boy's
man'
scott's
it´s
bakshi's


person's
you've
verhoeven's
spielberg's
it'll
carpenter's
life's
sister's
family's
who've
director's
where's
city's
author's
man's
friend's
we'd
would've
day's
freddy's
woman's
1930's
can't
ain't
actors'
90's
ossessione
ford's
couldn't
1990's
won't
that'll
other's
aren't
doctor's
everybody's
jackson's
we're
hollywood's
kelly's
david's
murphy's
dvd's
shakespeare's
characters'
mother's
he's
he'd
hitler's
everyone's
don't
could've
child's
miike's
simon's
children's
let's
didn't
you're
bug's
40's
someone's
today's
gypo
lynch's
1950's
palm

### Create a decoder-only transformer

In [38]:
# WORD + POSITION EMBEDDING
input_layer = tf.keras.layers.Input(shape=(None,))
word_embedding = tf.keras.layers.Embedding(10004, 50, embeddings_initializer = tf.keras.initializers.constant(embedding_matrix), trainable=False, mask_zero=True)(input_layer)
position_embedding = keras_nlp.layers.SinePositionEncoding()(word_embedding)
word_and_position_embedding = word_embedding + position_embedding # taking output of these two layers and element wise adds them
# ATTENTION
attention = tf.keras.layers.MultiHeadAttention(10, 50)(word_and_position_embedding, word_and_position_embedding, use_causal_mask=True) # one for query, the other for key (keys and value the same so no need third), causal mask: since we give the model the entire sentence at a time, we want it to not have access to words in the future at a given position in a sentence. So we use this mask to mask out words ahead so the self attention cannot look ahead
residual = tf.keras.layers.Add()([word_and_position_embedding, attention])
normalize = tf.keras.layers.LayerNormalization()(residual)
# DENSE
dense = tf.keras.layers.Dense(50, activation="relu")(normalize)
residual_dense = tf.keras.layers.Add()([normalize, dense])
normalize_dense = tf.keras.layers.LayerNormalization()(residual_dense)

# OUTPUT
linear = tf.keras.layers.Dense(50, activation=None)(normalize_dense)
output_layer = tf.keras.layers.Dense(10004, activation="softmax")(linear)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, None)]               0         []                            
                                                                                                  
 embedding_11 (Embedding)    (None, None, 50)             500200    ['input_12[0][0]']            
                                                                                                  
 sine_position_encoding_10   (None, None, 50)             0         ['embedding_11[0][0]']        
 (SinePositionEncoding)                                                                           
                                                                                                  
 tf.__operators__.add_8 (TF  (None, None, 50)             0         ['embedding_11[0][0]',  

In [39]:
y=np.roll(x_all_padded, -1, 1)

In [41]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy"
)