In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [3]:
processed_df = pd.read_pickle('../data/processed_df.pkl')

In [4]:
temp_bioguide_id = 'H001055'
temp_df = processed_df[processed_df.bioguide_id==temp_bioguide_id]

In [5]:
data = list(temp_df.speech.iloc[0].lower())

data = ['[BOS]'] + data + ['[EOS]']

In [6]:
lookup_layer = tf.keras.layers.StringLookup(num_oov_indices=0,output_mode='one_hot')
lookup_layer.adapt(data)

tensor = lookup_layer(data)

2022-04-21 00:09:59.016035: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
tensor = tf.expand_dims(tensor,axis=0)
batch_size,seq_len,voc_size = tensor.shape

In [8]:
class RNN_LM(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.lstm1 = tf.keras.layers.LSTM(units=512,return_sequences=True)
        self.lstm2 = tf.keras.layers.LSTM(units=256,return_sequences=True)
        self.dense1 = tf.keras.layers.Dense(units=256,activation='relu')
        self.dense2 = tf.keras.layers.Dense(units=voc_size)

    def call(self,x):
        hiddens = self.lstm2(self.lstm1(x))
        outputs = self.dense2(self.dense1(hiddens))
        
        return outputs

    def train_step(self, x):

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)[:,:-1,:] # from char 0 to char [EOS] - 1
            y = x[:,1:,:] # from char 1 to EOS
            
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)

        
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [14]:
tensor = tensor[:,:200,:]

In [16]:
model = RNN_LM()

optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, verbose=1)

model.compile(optimizer=optimizer, loss=loss)

model.fit(x=tensor, epochs=500, verbose=2, callbacks=[lr_scheduler], shuffle=True)

Epoch 1/500
1/1 - 3s - loss: 3.6617 - lr: 0.0010 - 3s/epoch - 3s/step
Epoch 2/500
1/1 - 0s - loss: 3.6296 - lr: 0.0010 - 402ms/epoch - 402ms/step
Epoch 3/500
1/1 - 0s - loss: 3.5455 - lr: 0.0010 - 460ms/epoch - 460ms/step
Epoch 4/500
1/1 - 0s - loss: 3.1795 - lr: 0.0010 - 462ms/epoch - 462ms/step
Epoch 5/500
1/1 - 0s - loss: 3.1332 - lr: 0.0010 - 478ms/epoch - 478ms/step
Epoch 6/500
1/1 - 0s - loss: 3.0558 - lr: 0.0010 - 469ms/epoch - 469ms/step
Epoch 7/500
1/1 - 0s - loss: 3.0002 - lr: 0.0010 - 472ms/epoch - 472ms/step
Epoch 8/500
1/1 - 0s - loss: 2.9853 - lr: 0.0010 - 469ms/epoch - 469ms/step
Epoch 9/500
1/1 - 0s - loss: 2.9716 - lr: 0.0010 - 411ms/epoch - 411ms/step
Epoch 10/500
1/1 - 0s - loss: 2.9523 - lr: 0.0010 - 478ms/epoch - 478ms/step
Epoch 11/500
1/1 - 0s - loss: 2.9393 - lr: 0.0010 - 466ms/epoch - 466ms/step
Epoch 12/500
1/1 - 0s - loss: 2.9365 - lr: 0.0010 - 470ms/epoch - 470ms/step
Epoch 13/500
1/1 - 0s - loss: 2.9349 - lr: 0.0010 - 420ms/epoch - 420ms/step
Epoch 14/500
1

<keras.callbacks.History at 0x7f9d519eb670>

In [18]:
# prediction

preds_tensor = np.zeros(shape=(1,1,39))
preds_tensor[0,0,:] = lookup_layer('[BOS]')
preds_tensor = tf.cast(tf.constant(preds_tensor),dtype=tf.float32)
decoded_str = ''

while len(decoded_str) < 200:
    last_of_preds = model(preds_tensor)[:,-1:,:]
    decoded_ch = lookup_layer.get_vocabulary()[np.argmax(last_of_preds)]
    decoded_str += decoded_ch
    preds_tensor = tf.concat([preds_tensor,tf.expand_dims(tf.expand_dims(lookup_layer(decoded_ch),axis=0),axis=0)],axis=1)

print(decoded_str)

. mr. speaker, washington seems to have forgotten that government money isn't free, and it is the american taxpayers who support its spending habit. simply put, the federal government doesn't respect 
