In [15]:
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch.utils.data import DataLoader
import torch
from typing import Counter
import torch.nn as nn
import torch.optim as optim

In [7]:
# Use this if you are using any Cuda enabled system
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [8]:
class TransMolecule(object):

    def __init__(self, molecule_num=5):
        self.molecule_num = molecule_num

    def __call__(self, sample):
        one_hot = sample.x[:, 0:self.molecule_num]
        molecule_counts = torch.sum(one_hot, dim=0)

        return molecule_counts

In [20]:
data = QM9(root='./practice_data', transform=TransMolecule(molecule_num=5))

"""
each batch is considered a hugh graph with many nodes and edges,
in EGNN, they introduce the concept of l2 distance between nodes, 
yet I am not including this (probably not) for now. 


"""
dataloader = DataLoader(data, batch_size=128, shuffle=False) 
for i , x in enumerate(dataloader):
    print(x)
    if i > 20: 
        break

tensor([[ 4.,  1.,  0.,  0.,  0.],
        [ 3.,  0.,  1.,  0.,  0.],
        [ 2.,  0.,  0.,  1.,  0.],
        [ 2.,  2.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  0.,  0.],
        [ 2.,  1.,  0.,  1.,  0.],
        [ 6.,  2.,  0.,  0.,  0.],
        [ 4.,  1.,  0.,  1.,  0.],
        [ 4.,  3.,  0.,  0.,  0.],
        [ 3.,  2.,  1.,  0.,  0.],
        [ 4.,  2.,  0.,  1.,  0.],
        [ 3.,  1.,  1.,  1.,  0.],
        [ 8.,  3.,  0.,  0.,  0.],
        [ 6.,  2.,  0.,  1.,  0.],
        [ 6.,  2.,  0.,  1.,  0.],
        [ 6.,  3.,  0.,  0.,  0.],
        [ 4.,  2.,  0.,  1.,  0.],
        [ 6.,  3.,  0.,  1.,  0.],
        [ 5.,  2.,  1.,  1.,  0.],
        [ 4.,  1.,  2.,  1.,  0.],
        [10.,  4.,  0.,  0.,  0.],
        [ 8.,  3.,  0.,  1.,  0.],
        [ 2.,  4.,  0.,  0.,  0.],
        [ 1.,  3.,  1.,  0.,  0.],
        [ 0.,  2.,  2.,  0.,  0.],
        [ 2.,  3.,  0.,  1.,  0.],
        [ 1.,  2.,  1.,  1.,  0.],
        [ 2.,  2.,  0.,  2.,  0.],
        [ 6.,  4.,  

In [13]:
class Autoencoder(nn.Module):
    def __init__(self, latent_dim,hidden_dim,input_dim):
        super(Autoencoder, self).__init__()
        output_dim = input_dim
        #TODO
        self.encoder = Encoder(latent_dim,hidden_dim,input_dim)
        self.decoder = Decoder(latent_dim,hidden_dim,output_dim)

    def forward(self, x):
        #TODO
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class Encoder(nn.Module):
    def __init__(self, latent_dim,hidden_dim,input_dim):
        super(Encoder, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, latent_dim)
        )
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.model(x)
    

class Decoder(nn.Module):
    def __init__(self, latent_dim,hidden_dim,output_dim):
        super(Decoder, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.model(x)
        return x.view(x.size(0),5) 

In [25]:
hidden_dim = 15
latent_dim = 2
n_epochs = 15 
from torchsummary import summary
autoencoder = Autoencoder(latent_dim, hidden_dim,5).to(device)
summary(autoencoder,(1,5))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 15]              90
         LeakyReLU-2                   [-1, 15]               0
            Linear-3                   [-1, 15]             240
         LeakyReLU-4                   [-1, 15]               0
            Linear-5                    [-1, 2]              32
           Encoder-6                    [-1, 2]               0
            Linear-7                   [-1, 15]              45
         LeakyReLU-8                   [-1, 15]               0
            Linear-9                   [-1, 15]             240
        LeakyReLU-10                   [-1, 15]               0
           Linear-11                    [-1, 5]              80
          Sigmoid-12                    [-1, 5]               0
          Decoder-13                    [-1, 5]               0
Total params: 727
Trainable params: 727

In [26]:
criterion = nn.MSELoss() # TODO: specify loss function
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001) # TODO: specify optimizer

for epoch in range(n_epochs):
    train_loss = 0
    for x in dataloader:
        #TODO
        # clear the gradients of all optimized variables
        # forward pass:
        # calculate the loss using the criterion defined above
        # backward pass: compute gradient of the loss with respect to model parameters
        # perform a single optimization step (parameter update)
        # update running training loss
        x = x.to(device)
        optimizer.zero_grad()
        output = autoencoder(x)
        loss = criterion(output, x)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss = train_loss/len(dataloader)
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

Epoch: 0 	Training Loss: 21.741423
Epoch: 1 	Training Loss: 21.630748
Epoch: 2 	Training Loss: 21.631905
Epoch: 3 	Training Loss: 21.630729
Epoch: 4 	Training Loss: 21.630933
Epoch: 5 	Training Loss: 21.630083
Epoch: 6 	Training Loss: 21.629822
Epoch: 7 	Training Loss: 21.629775
Epoch: 8 	Training Loss: 21.628553
Epoch: 9 	Training Loss: 21.605560
Epoch: 10 	Training Loss: 21.599110
Epoch: 11 	Training Loss: 21.598661
Epoch: 12 	Training Loss: 21.597918
Epoch: 13 	Training Loss: 21.597797
Epoch: 14 	Training Loss: 21.597634
