VAE example from http://nbviewer.jupyter.org/github/tfolkman/deep-learning-experiments/blob/master/VAE.ipynb

In [None]:
%matplotlib inline

import torch

from torch.autograd import Variable
from torch.nn.init import xavier_normal

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np
import time
import math

In [None]:
BATCH_SIZE = 128

transform = transforms.ToTensor()

# Load and transform data
trainset = torchvision.datasets.MNIST('/tmp', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST('/tmp', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

In [None]:
import torch.utils.data as utils

my_x = [np.array([[1.0,2],[3,4]]),np.array([[5.,6],[7,8]])] # a list of numpy arrays
my_y = [np.array([4.]), np.array([2.])] # another list of numpy arrays (targets)

tensor_x = torch.stack([torch.Tensor(i) for i in my_x]) # transform to torch tensors
tensor_y = torch.stack([torch.Tensor(i) for i in my_y])

my_dataset = utils.TensorDataset(tensor_x,tensor_y) # create your datset
trainloader = utils.DataLoader(my_dataset) # create your dataloader

In [None]:
my_dataset.tensors

# Define VAE

In [None]:
class VAE(nn.Module):
    def __init__(self, input_size, hidden_size=500, coding_size=20):
        super(VAE, self).__init__()
        self.hidden1 = nn.Linear(input_size, hidden_size)
        self.hidden2_mean = nn.Linear(hidden_size, coding_size)
        self.hidden2_gamma = nn.Linear(hidden_size, coding_size)
        self.hidden3 = nn.Linear(coding_size, hidden_size)
        self.output = nn.Linear(hidden_size, input_size)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        
    def encoder(self, x):
        hidden1_output = self.relu(self.hidden1(x))
        return self.hidden2_mean(hidden1_output), self.hidden2_gamma(hidden1_output)

    def reparameterize(self, mu, logvar):
        if self.training:
            noise = Variable(logvar.data.new(logvar.size()).normal_())
            return mu + torch.exp(0.5 * logvar) * noise
        else:
            return mu
    
    def decoder(self, x):
        output_value = self.relu(self.hidden3(x))
        output_value = self.output(output_value)
        return output_value, self.sigmoid(output_value)
            
    def forward(self, x):
        mean, gamma = self.encoder(x)
        z = self.reparameterize(mean, gamma)
        output, output_sigmoid = self.decoder(z)
        return output, output_sigmoid, mean, gamma

# Initialise VAE

In [None]:
# vae = VAE(input_size=28*28)

In [None]:
vae = VAE(input_size=2*2)

# Define loss and optimizer

In [None]:
def custom_loss(recon_x, x, mu, logvar):
    ce_loss = F.binary_cross_entropy_with_logits(recon_x, x,
                                                size_average=False)
    latent_loss = 0.5 * torch.sum(torch.exp(logvar) + mu.pow(2) - 1 - logvar)
    return ce_loss + latent_loss

optimizer = optim.Adam(vae.parameters(), lr=1e-3)

In [None]:
def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Define training

In [None]:
N_EPOCHS = 100
total_loss = 0
print_every = 70000 // BATCH_SIZE
plot_every = 7000 // BATCH_SIZE
all_losses = []
iter = 1

start = time.time()
vae.train()

In [None]:
import pixiedust

In [None]:
%%pixie_debugger
for e in range(N_EPOCHS):
    for i_batch, batch in enumerate(trainloader):
        image = batch[0]
        label = batch[1]
        image = Variable(image.view(-1, 1, 28*28))
        
        vae.zero_grad()
        output, output_sigmoid, mean, gamma = vae(image)
        loss = custom_loss(output, image, mean, gamma)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
        
        if iter % print_every == 0:
            print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / (len(trainloader) * N_EPOCHS) * 100, loss))

        if iter % plot_every == 0:
            all_losses.append(total_loss / (plot_every*BATCH_SIZE))
            total_loss = 0
        
        iter = iter + 1

# Evaluation

## Plot losses over time

In [None]:
plt.plot(all_losses)

## Visualize features

In [None]:
# get weights of first hiddenlayer
hidden1_weights = vae.hidden1.weight.data.cpu().numpy(); len(hidden1_weights)

### Plot example of hidden layer

In [None]:
neuron = 0
t = plt.imshow(hidden1_weights[neuron].reshape([28,28]),
           cmap='Greys')

## Decode some images

In [None]:
# Set module to evaluation mode
vae.eval()

### Get a random latent picture

In [None]:
random = Variable(torch.randn(64, 20)); random

### Decode latent picture

In [None]:
_, sample = vae.decoder(random); sample

In [None]:
t = plt.imshow(sample[58].data.numpy().reshape(28,28),cmap="Greys")

ModuleNotFoundError: No module named 'rdkit'

In [2]:
import numpy as np
import torch
import argparse
from torch.autograd import Variable
from rdkit import Chem
from molencoder.models import MolEncoder, MolDecoder
from molencoder.models import MolEncoder, MolDecoder
from molencoder.utils import( load_dataset, initialize_weights,ReduceLROnPlateau, save_checkpoint, validate_model)
from molencoder.featurizers import Featurizer, OneHotFeaturizer
 
 
SOURCE = 'c1ccccn1'
DEST =  'c1ccccc1'
STEPS = 200
#charset from chembl
WIDTH=120
 
charset = [' ', '#', '%', '(', ')', '+', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '=', '@', 'A', 'B', 'C', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'S', 'T', 'V', 'X', 'Z', '[', '\\', ']', 'a', 'c', 'e', 'g', 'i', 'l', 'n', 'o', 'p', 'r', 's', 't']
 
def decode_smiles_from_index(vec, charset):
    return "".join(map(lambda x:charset[x],vec)).strip()
 
 
def get_arguments():
    parser = argparse.ArgumentParser(description="Interpolate from source to dest in steps")
    parser.add_argument("--source", type=str, default=DEST)
    parser.add_argument("--dest", type=str, default=SOURCE)
    parser.add_argument("--steps", type=int, default=STEPS)
    return parser.parse_args()
 
def interpolate(source, dest, steps, charset, encoder, decoder):
    width=WIDTH
    source_just = source.ljust(width)
    dest_just = dest.ljust(width)
    onehot = OneHotFeaturizer(charset=charset)
    sourcevec = onehot.featurize(smiles=[source_just])
    destvec = onehot.featurize(smiles=[dest_just])
    source_encoded = Variable(torch.from_numpy(sourcevec).float()).cuda()
    dest_encoded = Variable(torch.from_numpy(destvec).float()).cuda()
    source_x_latent = encoder(source_encoded)
    dest_x_latent = encoder(dest_encoded)
    step = (dest_x_latent-source_x_latent)/float(steps)
    results = []
    for i in range(steps):
        item = source_x_latent + (step*i)
        sampled = np.argmax(decoder(item).cpu().data.numpy(), axis=2)
        #print(sampled)
        decode_smiles = decode_smiles_from_index(sampled[0], charset)
        results.append((i, item, decode_smiles))
    return results
 
def main():
    args= get_arguments()
    encoder = MolEncoder( c = len(charset))
    decoder = MolDecoder( c = len(charset))
    encoder.apply(initialize_weights)
    decoder.apply(initialize_weights)
     
    print( torch.cuda.is_available() )
    encoder = MolEncoder( c = len(charset)).cuda()
    encoder.apply(initialize_weights)
     
    decoder = MolDecoder( c = len(charset)).cuda()
    decoder.apply(initialize_weights)
     
    bestmodel = torch.load("model_best.pth.tar")
    #bestmodel = torch.load("tempcheckpoint.pth.tar")
    encoder.load_state_dict(bestmodel["encoder"])
    decoder.load_state_dict(bestmodel["decoder"])
 
    results = interpolate( args.source, args.dest, args.steps, charset, encoder, decoder )
    for result in results:
        print(result[0], result[2])
 
#if __name__=="__main__":
#    main()

ModuleNotFoundError: No module named 'molencoder'