In [None]:
import numpy as np
import tensorflow as tf
import keras
import time
import math
import random
import pickle
import moses

In [None]:
train_data = moses.get_dataset('train')

In [None]:
#class for splitting up SMILE strings into tokens and one hot encoding them
class SMILES_Tokenizer(object):
  def __init__(self):
      
      #creating list of all characters in SMILE encoding
      atoms = [
            'Li', 'Na', 'Al', 'Si', 'Cl', 'Sc', 'Zn', 'As', 'Se', 'Br', 'Sn',
            'Te', 'Cn', 'H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'K', 'V', 'I'
      ]

      special = [
            '(', ')', '[', ']', '=', '#', '%','.', '0', '1', '2', '3', '4', '5',
            '6', '7', '8', '9', '+', '-', 'se', 'te', 'c', 'n', 'o', 's'
      ]

      #creating list of characters used for starting string, padding string, 
      #and ending string
      padding = ['G', 'A', 'E']

      #creating total list of characters
      self.characters = sorted(atoms, key=len, reverse=True) + special + padding
      dict_len = len(self.characters)

      self.dict_len1 = [x for x in self.characters if len(x) == 1]
      self.dict_len2 = [x for x in self.characters if len(x) == 2]

      self.one_hot_dict = {}
      
      # creating one hot encoding vector for each character
      for i, char in enumerate(self.characters):
        vec = np.zeros(dict_len, dtype=np.float32)
        vec[i] = 1
        self.one_hot_dict[char] = vec
      self.one_hot_dict[-1] = np.full(dict_len, dtype=np.float32, fill_value=-1)



  #splitting SMILE string into tokens
  def tokenize(self, smiles):
    char_Count = len(smiles)
    smiles += ''
    tokens = []
    i = 0
    z = 0
    while(i<char_Count):
      
      #checking if next character has length 2 if so, finding what character it
      #is then appending it to list of tokens
      if smiles[i:i+2] in self.dict_len2:
        tokens.append(smiles[i:i+2])
        i+=2  
        continue
      
      #checking if character has length 1 if so, finding waht character it is
      #then appending it to list of tokens
      if smiles[i:i+1] in self.dict_len1:
        tokens.append(smiles[i:i+1])
        i+=1
        continue

      z +=1
    return tokens
  
  def one_hot_encode(self, tokenized_smiles):
    encoded_smiles = np.array([self.one_hot_dict[symbol] for symbol in tokenized_smiles],np.float32)
    encoded_smiles = encoded_smiles.reshape(encoded_smiles.shape[0], encoded_smiles.shape[1])
    tensor = tf.convert_to_tensor(encoded_smiles, dtype=tf.float32)
    return encoded_smiles

In [None]:
class Data_Processing(object):
  def __init__(self, max_len):
    self.tokenizer = SMILES_Tokenizer()
    self.one_hot_dict = self.tokenizer.one_hot_dict
    self.dictionary = self.tokenizer.characters

    #setting max length for SMILE strings
    self.max_len = max_len

  #split SMILE string into tokens
  def tokenize(self, smi):
    return self.tokenizer.tokenize(smi)

  #tokenize for batches
  def tokenize_data(self, data):
    tokenized_smiles = [self.tokenizer.tokenize(smi) for smi in data]
    return tokenized_smiles  
      
  #one hot encode tokenized SMILE string
  def one_hot_encode(self, pad_smi):
    return self.tokenizer.one_hot_encode(pad_smi)
  
  #one_hot_encode for batches
  def one_hot_encoding(self, data):
    encoded_smiles = [self.tokenizer.one_hot_encode(pad_smi) for pad_smi in data]
    return encoded_smiles

  #adding padding and adding Start and End tokens to SMILE strings
  def pad(self, tokenized_smi):
    return ['G'] + tokenized_smi + ['E'] + [
      'A' for _ in range(self.max_len - len(tokenized_smi))
    ]

  #padding for batches
  def padding(self, data):
    padded_smiles = [self.pad(t_smi) for t_smi in data]
    return padded_smiles

  def get_dictionary(self):
    return self.one_hot_dict

  #mapping one-hot encoded array to character
  def basic_inverse_dictionary(self, encoded_str):
    
    decoded_str = [np.where(vector == 1) for vector in encoded_str]
    return [self.characters[index] for index in decoded_str[0][1]]

  #mapping index to character
  def inverse_dictionary(self, index):
    return self.characters[index]

  #inverse_dictionary for batches
  def inverse_dictionary_(self, index):
    return [self.characters[i] for i in index]




In [None]:
#creating generator function to proccess data in batches
def load_data(data, batch_size):
  tokenizer = Data_Processing(60)
  tokens = []
  padded_tokens = []
  encoded_tokens = []
  
  #calculating number of batchesin total data
  steps = len(data) // batch_size

  
  i = 0
  
  while True:
    #applying all proccessing to data
    tokens = tokenizer.tokenize_data(data[i*batch_size:(i+1)*batch_size])
    padded_tokens = tokenizer.padding(tokens)
    encoded_tokens = np.array(tokenizer.one_hot_encoding(padded_tokens))
    i+=1
    if(i == steps):
      i = 0
      
    
    yield encoded_tokens

In [None]:
#setting hyperparameters for model
BATCH_SIZE = 1024
enc_units = 1024
vocab_inp_size = 53
vocab_tar_size = 53
learning_rate = 0.0001
batch_steps = len(train_data) // BATCH_SIZE

In [None]:
#creating loss calculator and optimizer objects
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction='none')
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [None]:
#network archtecture definition
class GRU(tf.keras.Model):
  def __init__(self, time_steps, vocab_size, batch_size):
    super(GRU ,self).__init__()
    #first hidden layer
    self.gru1 = tf.keras.layers.GRU(
                                   enc_units,
                                   return_sequences = True,
                                   return_state = True,
                                   recurrent_initializer='glorot_uniform',
                                   dropout = 0.3)
    #second hidden layer
    self.gru2 = tf.keras.layers.GRU(
                                   enc_units,
                                   return_sequences = True,
                                   return_state = True,
                                   recurrent_initializer='glorot_uniform',
                                   dropout = 0.3)
    #third hidden layer
    self.gru3 = tf.keras.layers.GRU(
                                   enc_units,
                                   return_sequences = True,
                                   return_state = True,
                                   recurrent_initializer='glorot_uniform',
                                   dropout = 0.3)      
    self.vocab_size = vocab_size
    self.time_steps = time_steps
    self.batch_size = batch_size
    
    #dense layer doesnt have softmax, softmax is only used for inference as its
    #supposed to make model training more stable
    self.dense = tf.keras.layers.Dense(vocab_size,
                                       kernel_initializer='glorot_uniform')

  #forward step for calculating prediction for next time step and hidden states
  def call(self, x, hidden_state1, hidden_state2, hidden_state3):
    
    #calculating hidden states and passing them along to next hidden layer
    output, hidden_state1 = self.gru1(x, hidden_state1)
    output, hidden_state2 = self.gru2(output, hidden_state2)
    output, hidden_state3 = self.gru3(output, hidden_state3)
    
    #setting tensor dimensions for compatibility with dense output layer
    output = tf.reshape(output, (-1, output.shape[2]))
    output = self.dense(output)
    
    return output, hidden_state1, hidden_state2, hidden_state3

  #function for setting initial hidden states
  def initialize_hidden_state(self):
    return tf.zeros((self.batch_size, enc_units))


In [None]:
import time
import os
tf.keras.backend.clear_session()

#initializing TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)

tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
  
  #initializing neural network
  gru = GRU(62, 53, BATCH_SIZE)

  #choosing checkpoint to save weights too
  checkpoint_dir = '/content/drive/MyDrive/GRU_CHEM_V15'
  checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
  options = tf.train.CheckpointOptions(experimental_io_device="/job:localhost")
  checkpoint = tf.train.Checkpoint(optimizer=optimizer, GRU=gru)
  
  #Weights can be restored like this:
  #checkpoint.restore('/content/drive/MyDrive/GRU_CHEM_V13/ckpt-24', options=options)
  
  
  @tf.function
  def train_step(inp, targ, GRU_hidden1, GRU_hidden2, GRU_hidden3, teacher_forcing):
    loss = 0
    with tf.GradientTape() as tape:
      for t in range(1, targ.shape[1] - 1):
        
        #if teacher forcing is true or this is first input to neural net,
        #pass the the target from previous time step to model
        if t == 1 or random.random() < teacher_forcing:
          GRU_input = tf.expand_dims(targ[:,t], 1)
        
        #if teacher forcing is false, pass previous prediction as next input
        #to neural net
        else:
          GRU_input = tf.expand_dims(prediction, 1)
          GRU_input = tf.one_hot(tf.argmax(GRU_input, 2), 53)
        
        #calculating next hidden states and predictions of model
        prediction, GRU_hidden1, GRU_hidden2, GRU_hidden3 = gru(GRU_input, GRU_hidden1, GRU_hidden2, GRU_hidden3)
        
        #calculating loss 
        loss += tf.nn.compute_average_loss(loss_object(targ[:,t+1], prediction), global_batch_size=BATCH_SIZE)
      
      
    #calculating loss for batch
    batch_loss = (loss / int(targ.shape[1]))

    variables = gru.trainable_variables

    #calculating gradients
    gradients = tape.gradient(loss, variables)

    #using gradients to update network weights
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss
  
  #wrapping train step so it can run on TPU
  @tf.function
  def distributed_train_step(inp, targ, GRU_hidden1, GRU_hidden2, GRU_hidden3, teacher_forcing):
    #calculating losses for each TPU core
    per_replica_losses = strategy.run(train_step, args=(inp, targ, GRU_hidden1, GRU_hidden2, GRU_hidden3, teacher_forcing))
    #back propogating to update losses
    return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                          axis=None)

  EPOCHS = 25
  teacher_forcing = 1.0

  #creating generator object to load data
  dataset_test = load_data(train_data, BATCH_SIZE)
  
  initial = time.time()
  for epoch in range(EPOCHS):
    start = time.time()
    total_loss = 0

    #initalizing hidden states
    gru_hidden = gru.initialize_hidden_state()

    for batch in range(batch_steps):
      #loading next batch of data
      inp = next(dataset_test)
      
      #training network on batch
      batch_loss = distributed_train_step(inp, inp, gru_hidden, gru_hidden, gru_hidden, teacher_forcing)
      total_loss += batch_loss
      if batch % 100 == 0:
        print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                    batch,
                                                    batch_loss.numpy()))
        
    #saving weights every epoch
    checkpoint.save(file_prefix=checkpoint_prefix, options=options)

    print('Epoch {} Loss {:.4f} '.format(epoch + 1, total_loss / batch_steps))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


INFO:tensorflow:Initializing the TPU system: grpc://10.17.149.250:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.17.149.250:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU')]
INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Epoch 1 Batch 0 Loss 30.8058
Epoch 1 Batch 100 Loss 11.0516
Epoch 1 Batch 200 Loss 8.0252
Epoch 1 Batch 300 Loss 6.9017
Epoch 1 Batch 400 Loss 5.2825
Epoch 1 Batch 500 Loss 6.2903
Epoch 1 Batch 600 Loss 5.1282
Epoch 1 Batch 700 Loss 4.4622
Epoch 1 Batch 800 Loss 5.1664
Epoch 1 Batch 900 Loss 4.9961
Epoch 1 Batch 1000 Loss 4.6610
Epoch 1 Batch 1100 Loss 4.1100
Epoch 1 Batch 1200 Loss 3.7758
Epoch 1 Batch 1300 Loss 4.3745
Epoch 1 Batch 1400 Loss 4.4203
Epoch 1 Batch 1500 Loss 4.7138
Epoch 1 Loss 5.8173 
Time taken for 1 epoch 1831.1882202625275 sec

Epoch 2 Batch 0 Loss 3.4837
Epoch 2 Batch 100 Loss 3.9955
Epoch 2 Batch 200 Loss 3.4166
Epoch 2 Batch 300 Loss 3.8829
Epoch 2 Batch 400 Loss 3.2336
Epoch 2 Batch 500 Loss 4.8681
Epoch 2 Batch 600 Loss 3.6180
Epoch 2 Batch 700 Loss 3.4817
Epoch 2 Batch 800 Loss 4.0911
Epoch 2 Batch 900 Loss 3.8671
Epoch 2 Batch 1000 Loss 3.7343
Epoch 2 Batch 1100 Loss 3.4536
Epoch 2 Batch 1200 Loss 3.1441
Epoch 2 Batch 1300 Loss 3.6923
Epoch 2 Batch 1400 Loss 

KeyboardInterrupt: ignored