In [1]:
import numpy as np
import sys
from tensorflow import one_hot, GradientTape 
from tensorflow.keras.layers import Dense, LSTM, Input, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LambdaCallback
from tensorflow.keras.models import Model

2023-01-26 23:52:33.830434: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-26 23:52:43.509570: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-01-26 23:52:43.510121: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


---
## Creating the Dataset

In [2]:
# Loads the abstracts array
data = np.load('datasets/patent/patent_abstracts.npy')

# Joins all abstracts in one text to get unique words
all_texts = ''
for text in data:
    all_texts += text + '\n'
all_texts = all_texts.lower()

# Gets all unique chars
chars = sorted(list(set(all_texts)))
num_chars = len(chars)

# Creates a mapping from character to integer and from integer back to character
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [3]:
training_length = 80
max_training = 3000

features = []
labels = []

# Iterate through the sequences of tokens
for seq in data[:max_training]:
    abstract = seq.lower()
    
    # Create multiple training examples from each sequence
    for i in range(training_length, len(abstract)):
        
        # Gets <training_length> + 1 elements of the abstract
        extract = abstract[i-training_length : i+1]
        
        # Transforms each char in  numeric value
        abs_aux = []
        for char in extract[:-1]:
            abs_aux.append(char_indices[char])
        
        # Append the transformed text and the transformed label (the net char after the sequence)
        features.append(np.array(abs_aux))
        labels.append(char_indices[extract[-1]])
        
features = np.array(features)
labels = np.array(labels)

print('Features shape: ', features.shape)
print('Labels shape: ', labels.shape)

Features shape:  (1976038, 80)
Labels shape:  (1976038,)


---
## Creates the network

In [4]:
class CustomModel(Model):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data
        one_hot_y = one_hot(y, num_chars)

        with GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            loss = self.compiled_loss(one_hot_y, y_pred, regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(one_hot_y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        # Unpack the data
        x, y = data
        one_hot_y = one_hot(y, num_chars)
        # Compute predictions
        y_pred = self(x, training=False)
        # Updates the metrics tracking the loss
        self.compiled_loss(one_hot_y, y_pred, regularization_losses=self.losses)
        # Update the metrics.
        self.compiled_metrics.update_state(one_hot_y, y_pred)
        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {m.name: m.result() for m in self.metrics}

In [5]:
inp = Input(shape=training_length)
out = Embedding(input_dim=num_chars, output_dim=80, input_length=training_length)(inp)
out = LSTM(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(out)
out = LSTM(64, return_sequences=False, dropout=0.1, recurrent_dropout=0.1)(out)
out = Dense(num_chars, activation='softmax')(out)

model = CustomModel(inp, out)
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.006))

2023-01-26 23:54:01.132293: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-26 23:54:03.685172: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2023-01-26 23:54:03.685624: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-01-26 23:54:03.740499: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neur

---
## Helper functions to run after each epoch

In [6]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)
    
    text = data[np.random.randint(max_training, len(data))].lower()
    start_index = 0
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + training_length]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        
        x_pred = []
        for char in sentence:
            x_pred.append(char_indices[char])

        for i in range(400):
            preds = model.predict(np.array([x_pred]), verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            x_pred.append(next_index)
            x_pred = x_pred[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
    return

In [7]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
checkpoint = ModelCheckpoint('models/1/model.h5', monitor='loss', verbose=1, save_best_only=True, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=1, min_lr=0.001)

---
## Model training

In [8]:
model.fit(features, labels, batch_size=64, epochs=5, callbacks=[print_callback, checkpoint, reduce_lr])

2023-01-26 23:54:05.914024: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 1264664320 exceeds 10% of free system memory.


Epoch 1/5
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "the present disclosure concerns an electro-mechanical flushing system for a resi"
the present disclosure concerns an electro-mechanical flushing system for a resind in the toilet bowl of the comprised to a control container and the toilet bowl in the toilet bowl in the container in the toilet bowl is container and a toilet bowl in the toilet bowl to the toilet bowl in the container to the toilet bowl to the container and the bowl in the toilet bowl of the toilet bowl is seat and the toilet bowl is a container in the toilet bowl in the toilet bowl and a com
----- diversity: 0.5
----- Generating with seed: "the present disclosure concerns an electro-mechanical flushing system for a resi"
the present disclosure concerns an electro-mechanical flushing system for a resind to the expending the comprise of a seat in the toilet bowl inders seat contage of the toilet bowl to the toilet to the toilet

  preds = np.log(preds) / temperature


f the first to the fixe to the toilet bowl to the first from the toilet to the second the toilet bowl to the seat to the first to the flush to the toilet to the first from the toilet bowl and the toilet bowl to the toilet from the flushing the toilet to the toilet to the toilet to the toilet to the seat of the toilet bowl and a fixe the seat to the first to the toilet to the fixe to the fixe
----- diversity: 0.5
----- Generating with seed: "a canister flush valve is disclosed with a valve seat modified to accommodate a "
a canister flush valve is disclosed with a valve seat modified to accommodate a toilet from the first to the flush section of the pivotation the convention for in the for for the toilet to a fixe shape the for a toilet body to the flush for flush to the paper to the flushing toilet to the fad to the toilet such the persor to seat comprises as the first for the flush control the toilet bowl to the toilet for the fluid interior to the toilet bowl of the toilet from the f

<keras.callbacks.History at 0x7f561ef57880>

In [9]:
def generate_text(length, diversity):
    # Get random starting text
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
    return generated