In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
processed_df = pd.read_pickle('processed_df.pkl')

In [3]:
temp_bioguide_id = 'H001055'
temp_df = processed_df[processed_df.bioguide_id==temp_bioguide_id]

In [4]:
data = list(temp_df.speech.iloc[0].lower())

data = ['[BOS]'] + data + ['[EOS]']

In [5]:
lookup_layer = tf.keras.layers.StringLookup(num_oov_indices=0,output_mode='one_hot')
lookup_layer.adapt(data)

tensor = lookup_layer(data)

2022-04-20 22:39:21.046080: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
tensor = tf.expand_dims(tensor,axis=0)
batch_size,seq_len,voc_size = tensor.shape

In [7]:
class RNN_LM(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.lstm1 = tf.keras.layers.LSTM(units=512,return_sequences=True)
        self.lstm2 = tf.keras.layers.LSTM(units=256,return_sequences=True)
        self.dense1 = tf.keras.layers.Dense(units=256,activation='relu')
        self.dense2 = tf.keras.layers.Dense(units=voc_size)

    def call(self,x):
        hiddens = self.lstm2(self.lstm1(x))
        outputs = self.dense2(self.dense1(hiddens))
        
        return outputs

    def train_step(self, x):

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)[:,:-1,:] # from char 0 to char [EOS] - 1
            y = x[:,1:,:] # from char 1 to EOS
            
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)

        
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [13]:
model = RNN_LM()

optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, verbose=1)

model.compile(optimizer=optimizer, loss=loss)

model.fit(x=tensor, epochs=500, verbose=2, callbacks=[lr_scheduler], shuffle=True)



Epoch 1/500
1/1 - 3s - loss: 3.6623 - lr: 0.0010 - 3s/epoch - 3s/step
Epoch 2/500
1/1 - 0s - loss: 3.6317 - lr: 0.0010 - 412ms/epoch - 412ms/step
Epoch 3/500
1/1 - 0s - loss: 3.5622 - lr: 0.0010 - 312ms/epoch - 312ms/step
Epoch 4/500
1/1 - 0s - loss: 3.3042 - lr: 0.0010 - 277ms/epoch - 277ms/step
Epoch 5/500
1/1 - 0s - loss: 3.2297 - lr: 0.0010 - 304ms/epoch - 304ms/step
Epoch 6/500
1/1 - 0s - loss: 3.0002 - lr: 0.0010 - 313ms/epoch - 313ms/step
Epoch 7/500
1/1 - 0s - loss: 2.9399 - lr: 0.0010 - 294ms/epoch - 294ms/step
Epoch 8/500
1/1 - 0s - loss: 2.8999 - lr: 0.0010 - 267ms/epoch - 267ms/step
Epoch 9/500
1/1 - 0s - loss: 2.8740 - lr: 0.0010 - 300ms/epoch - 300ms/step
Epoch 10/500
1/1 - 0s - loss: 2.8486 - lr: 0.0010 - 324ms/epoch - 324ms/step
Epoch 11/500
1/1 - 0s - loss: 2.8193 - lr: 0.0010 - 315ms/epoch - 315ms/step
Epoch 12/500
1/1 - 0s - loss: 2.7966 - lr: 0.0010 - 326ms/epoch - 326ms/step
Epoch 13/500
1/1 - 0s - loss: 2.7815 - lr: 0.0010 - 350ms/epoch - 350ms/step
Epoch 14/500
1

<keras.callbacks.History at 0x7f7fe29bed90>

In [85]:
# prediction

preds_tensor = np.zeros(shape=(1,1,39))
preds_tensor[0,0,:] = lookup_layer('[BOS]')
preds_tensor = tf.cast(tf.constant(preds_tensor),dtype=tf.float32)
decoded_str = ''

while len(decoded_str) < 100:
    last_of_preds = model(preds_tensor)[:,-1:,:]
    decoded_ch = lookup_layer.get_vocabulary()[np.argmax(last_of_preds)]
    decoded_str += decoded_ch
    preds_tensor = tf.concat([preds_tensor,tf.expand_dims(tf.expand_dims(lookup_layer(decoded_ch),axis=0),axis=0)],axis=1)

print(decoded_str)