In [2]:
from tensorflow import keras 
import tensorflow as tf

vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
path = '/Users/arian/Downloads/ML/EX/NLP/aclImdb/imdb-2.npz'
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(path=path,num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)

25000 Training sequences
25000 Validation sequences


### Text classification


In [3]:
from keras import layers

In [4]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, dense_dim):
        super().__init__()
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads

        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential([layers.Dense(dense_dim,activation='relu'), layers.Dense(embed_dim),])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        

    def call(self, inputs, mask=None):
        if mask is not None :
            mask = mask[:,tf.newaxis, :]
        attn_output = self.att(inputs,inputs,attention_mask=mask)
        out1 = self.layernorm1(inputs+attn_output)
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + ffn_output)
        
    def get_config(self):
        config = super().get_config()
        config.update({
            'embed_dim':self.embed_dim,
            'num_heads' :self.num_heads,
            'dense_dim':self.dense_dim ,      
            })
        return config

In [10]:
vocab_size = 200000
embed_dim = 128
num_heads = 2
dense_dim = 32
inputs = tf.keras.Input(shape=(None,),dtype='int64')
x = tf.keras.layers.Embedding(vocab_size,embed_dim)(inputs)
x = TransformerBlock(embed_dim, num_heads, dense_dim)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer='rmsprop', loss='binary_crossentropy',metrics=['accuracy'])
model.summary()


Metal device set to: Apple M2 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-08-05 12:52:01.398417: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-08-05 12:52:01.399190: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 128)         25600000  
                                                                 
 transformer_block (Transfor  (None, None, 128)        140832    
 merBlock)                                                       
                                                                 
 global_max_pooling1d (Globa  (None, 128)              0         
 lMaxPooling1D)                                                  
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 129   

In [15]:
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))

Epoch 1/2


2023-08-03 10:19:29.348738: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-08-03 10:19:30.294969: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-08-03 10:21:07.610867: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


<keras.callbacks.History at 0x2e0db0970>

### Implementing positional embedding

In [8]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size,output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen,output_dim=embed_dim)
    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [11]:
inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
x = TransformerBlock(embed_dim, num_heads, dense_dim) (x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer='rmsprop', loss='binary_crossentropy',metrics=['accuracy'])
model.summary()



Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 200)]             0         
                                                                 
 token_and_position_embeddin  (None, 200, 128)         25625600  
 g (TokenAndPositionEmbeddin                                     
 g)                                                              
                                                                 
 transformer_block_1 (Transf  (None, 200, 128)         140832    
 ormerBlock)                                                     
                                                                 
 global_max_pooling1d_1 (Glo  (None, 128)              0         
 balMaxPooling1D)                                                
                                                                 
 dropout_1 (Dropout)         (None, 128)               0   

In [12]:
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))

Epoch 1/2


2023-08-05 12:52:16.143224: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-08-05 12:52:17.117889: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-08-05 12:53:53.554736: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


<keras.callbacks.History at 0x2db13be50>