In [None]:
# Initialize drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
# Move to Google Drive 
%cd drive
%cd 'My Drive'
%cd 'MSc Stats Dissertation'


/content/drive
/content/drive/My Drive
/content/drive/My Drive/MSc Stats Dissertation


In [None]:
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras as keras
import tensorflow.keras.layers  as layers
import pandas as pd
import math
import time
import numpy as np
import matplotlib.pyplot as plt

In [None]:
BATCH_NORM = True
CONV_ACTIVATION = 'tanh'
CONV_DEPTH = 4
CONV_DIM_DEPTH = 32
CONV_DIM_WIDTH = 16
CONV_D_GF = 1.15875438383
CONV_W_GF = 1.1758149644
HIDDEN_DIM = 100

In [None]:
## Import Necessary Data for training
train_smiles_X = np.load('./vocab/train_selfies_X.npy',allow_pickle=True)
vocab =np.load('./vocab/selfies_vocab.npy',allow_pickle=True)
vocab_index = np.load('./vocab/selfies_vocab_index.npy',allow_pickle=True)
vocab = dict(vocab.ravel()[0])
vocab_index = dict(vocab_index.ravel()[0])

In [None]:
## Neccesary CONSTANTS
BATCH_SIZE = 256
VOCAB_SIZE = len(vocab)
EPOCHS = 40
LEARNING_RATE = 1e-4
Z_X_LEARNING_RATE= 1e-5
DROP_OUT= 0.2
EMBEDDING_DIM = 192  ## Embedding dim of the characters
PAD_LEN = 250 ## Maximum size of a SMILE (100 + BOS, EOS)
MAX_LEN = PAD_LEN -1
DROPOUT = 0.2
LATENT_DIM = 64
HIDDEN_DIM = 256

In [None]:
index = np.where(train_smiles_X == 1)
t = np.split(train_smiles_X,index[0].tolist())
t= t[1:]
t = tf.keras.preprocessing.sequence.pad_sequences(t,maxlen=PAD_LEN,padding='post')
NUM_BATCHES = math.floor(len(t)/BATCH_SIZE )

In [None]:
NUM_TRAIN_BATCH = math.floor(NUM_BATCHES*0.99)
NUM_TEST_BATCH = math.floor(NUM_BATCHES*(0.01))

In [None]:
test_X = t[NUM_TRAIN_BATCH*BATCH_SIZE:(NUM_TEST_BATCH+NUM_TRAIN_BATCH)*BATCH_SIZE]
train_X = t[:NUM_TRAIN_BATCH*BATCH_SIZE]

In [None]:
import math
def softmax_logits_loss_with_pad(labels,logits):
  weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
  nonpad_seq = tf.math.count_nonzero(weights, dtype=tf.dtypes.float32, )
  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits =logits)
  loss = tf.reduce_sum(loss *weights)
  return loss


In [None]:
## Cyclical Annealing ###
def frange_cycle_linear(n_iter, start=0.0, stop=1.0,  n_cycle=4, ratio=0.5):
  L = np.ones(n_iter) * stop
  period = n_iter/n_cycle
  step = (stop-start)/(period*ratio) # linear schedule
  for c in range(n_cycle):
    v, i = start, 0
    while v <= stop and (int(i+c*period) < n_iter):
      L[int(i+c*period)] = v
      v += step
      i += 1
  return L 

In [None]:
def train_smile_vae(smile_ivae,train_X, test_X,model_type, betas, num_updates):
  clip = -1
  display_step = 100
  STEPS_PER_EPOCH = train_X.shape[0]//BATCH_SIZE
  TEST_STEPS = test_X.shape[0]//BATCH_SIZE

  ## Define optimizers
  optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
  optimizer_xz = tf.keras.optimizers.Adam(learning_rate=Z_X_LEARNING_RATE)
  optimizer_z = tf.keras.optimizers.Adam(learning_rate=Z_X_LEARNING_RATE)

  ## Cyclical Annealing index ##
  beta_ind = 0 

  for epoch in range(EPOCHS):
    indxs = np.arange(STEPS_PER_EPOCH)
    np.random.shuffle(indxs)
    for batch in range(STEPS_PER_EPOCH):
      ## Get relevant batch data
      X_batch = train_X[indxs[batch]*BATCH_SIZE: indxs[batch]*BATCH_SIZE + BATCH_SIZE]

      ## Update the auxillary network 
      for n in range(num_updates):  
        eps = tf.convert_to_tensor(np.random.normal(size=(X_batch.shape[0], LATENT_DIM)),dtype = tf.float32)
        with tf.GradientTape() as kl_xz_tape, tf.GradientTape() as kl_z_tape:

          ## Get encoding of training data
          enc, z_x = smile_ivae.encoder(X_batch[:,:-1],eps)
          kl_xz_vars = smile_ivae.nu_xz.trainable_variables 
          kl_z_vars = smile_ivae.nu_z.trainable_variables   
         
          ## Get logits of the model 
          kl_xz = smile_ivae.kl_xz_loss(z_x =z_x, enc = enc)
          kl_z  = smile_ivae.kl_z_loss(z_x = z_x)

        ## KL updates
        gradients = kl_xz_tape.gradient(kl_xz, kl_xz_vars)
        if clip != -1:
          gradients, _ = tf.clip_by_global_norm(gradients, clip) 
        optimizer_xz.apply_gradients(zip(gradients, kl_xz_vars))
        
        gradients = kl_z_tape.gradient(kl_z, kl_z_vars)
        if clip != -1:
          gradients, _ = tf.clip_by_global_norm(gradients, clip) 
        optimizer_z.apply_gradients(zip(gradients, kl_z_vars))

      with tf.GradientTape() as tape:
        ## end to end update for implict model 
        x_decoded, enc, z_x = smile_ivae(X_batch[:,:-1])
        vars = smile_ivae.decoder.trainable_variables
        vars.extend(smile_ivae.encoder.trainable_variables)
        loss_op = softmax_logits_loss_with_pad(labels=X_batch[:,1:],logits =x_decoded)

        ## Model update using auxillary network and softmax 
        if model_type == 'mle':
          loss_op = loss_op +( betas[beta_ind]*tf.reduce_sum(smile_ivae.nu_xz(z_x =z_x, enc = enc)))
        else:
          loss_op = loss_op  + (betas[beta_ind] *tf.reduce_sum(smile_ivae.nu_z(z_x = z_x)))
      gradients = tape.gradient(loss_op, vars)
      if clip != -1:
        gradients, _ = tf.clip_by_global_norm(gradients, clip) 
      optimizer.apply_gradients(zip(gradients, vars))

      ## Display training information
      if batch % display_step == 0 or batch == 1:
        ### Get logits for test data
        rand_int = np.random.randint(low=0,high = TEST_STEPS)
        test_batch = test_X[rand_int*BATCH_SIZE: rand_int*BATCH_SIZE + BATCH_SIZE ]
        x_decoded, enc, z_x = smile_ivae(test_batch[:,:-1])
        test_loss = softmax_logits_loss_with_pad(labels=test_batch[:,1:],logits =x_decoded)

        ## Get testing loss
        if model_type == 'mle':
          test_loss = test_loss +(betas[beta_ind]*tf.reduce_mean(smile_ivae.nu_xz(z_x =z_x, enc = enc)))
        else:
          test_loss = test_loss  + (betas[beta_ind] *tf.reduce_mean(smile_ivae.nu_z(z_x = z_x)))
        
        ## Get testing information for displaying accuracy ##
        pred_test = tf.nn.softmax(x_decoded,axis =-1)
        weights = tf.cast(tf.not_equal(test_batch[:,1:], 0), tf.float32)
        nonpad_seq = tf.math.count_nonzero(weights, dtype=tf.dtypes.float32, )
        correct_pred_test = tf.equal(tf.argmax(pred_test,-1),test_batch[:,1:])
        accuracy_test = tf.reduce_sum(tf.cast(correct_pred_test, tf.float32))/nonpad_seq

        ## Get regularizing loss for model 
        if model_type == 'mle':
          kl_loss = betas[beta_ind]*tf.reduce_sum(smile_ivae.nu_xz(z_x =z_x, enc = enc))
        else:
          kl_loss = betas[beta_ind] *tf.reduce_sum(smile_ivae.nu_z(z_x = z_x))
        ### Print out test accuracy on model  
        print("Step " + str(batch) + ", Training Loss = " + \
                "{:.3f}".format(loss_op) + ", Test Loss = " + \
                "{:.3f}".format(test_loss) + ", Test Accuracy = " + \
                "{:.3f}".format(accuracy_test) +", Implict KL value = " + \
                "{:.3f}".format(kl_loss))
      beta_ind+=1
      ## Save every so often
      if  (batch) % 3000 == 0:
        smile_ivae.save_weights('selfies_ivae_weights')

In [None]:
betas = frange_cycle_linear(train_X.shape[0]//BATCH_SIZE *EPOCHS)
smile_ivae = SMILE_IMPLICIT_VAE(vocab_size =VOCAB_SIZE,embedding_dim =EMBEDDING_DIM, 
              max_len =MAX_LEN, latent_dim=LATENT_DIM, hidden_dim= HIDDEN_DIM,
              recurrent_dropout =0.2,dropout_rate=0.2,epsilon_std = 1.0)

In [None]:
## Train with 3 updates of auxillary network for every end to 
## end update
train_smile_vae(smile_ivae,train_X,test_X,'mle_li',betas,3)

Step 0, Training Loss = 62735.016, Test Loss = 61558.016, Test Accuracy = 0.216, Implict KL value = -0.000
Step 1, Training Loss = 66397.398, Test Loss = 60413.602, Test Accuracy = 0.242, Implict KL value = -0.000
Step 100, Training Loss = 30763.246, Test Loss = 29791.525, Test Accuracy = 0.270, Implict KL value = 58.649
Step 200, Training Loss = 27964.934, Test Loss = 28035.053, Test Accuracy = 0.253, Implict KL value = 166.675
Step 300, Training Loss = 29332.871, Test Loss = 28312.623, Test Accuracy = 0.301, Implict KL value = 222.529
Step 400, Training Loss = 31472.949, Test Loss = 28512.705, Test Accuracy = 0.296, Implict KL value = 120.018
Step 500, Training Loss = 29511.568, Test Loss = 28056.635, Test Accuracy = 0.304, Implict KL value = 67.250
Step 600, Training Loss = 29225.125, Test Loss = 26896.844, Test Accuracy = 0.325, Implict KL value = 44.876
Step 700, Training Loss = 28374.314, Test Loss = 26805.777, Test Accuracy = 0.336, Implict KL value = 51.246
Step 800, Training L

In [None]:
smile_ivae.summary()