In [3]:
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
import torch
from typing import Counter
import torch.nn as nn
import torch.optim as optim

In [4]:
# Use this if you are using any Cuda enabled system
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [5]:
class TransMolecule(object):

    def __init__(self, molecule_num=5):
        self.molecule_num = molecule_num

    def __call__(self, sample):
        num_atoms = sample.x.size(0)
        edges = sample.edge_index
        adj_matrix = torch.zeros(num_atoms, num_atoms)
        for i in range(edges.size(1)):
            start, end = edges[0][i], edges[1][i]
            adj_matrix[start][end] = 1
            adj_matrix[end][start] = 1

        # fix size of adj_matrix 24 x 24
        if num_atoms < 29:
            adj_matrix = torch.cat((adj_matrix, torch.zeros(29 - num_atoms, num_atoms)), dim=0)
            adj_matrix = torch.cat((adj_matrix, torch.zeros(29, 29 - num_atoms)), dim=1)

        return adj_matrix

In [6]:
data = QM9(root='./qm9_data', transform=TransMolecule())

"""
each batch is considered a hugh graph with many nodes and edges,
in EGNN, they introduce the concept of l2 distance between nodes, 
yet I am not including this (probably not) for now. 


"""
dataloader = DataLoader(data, batch_size=64, shuffle=False) 
for i , x in enumerate(dataloader):
    print(x.shape)
    if i > 20:
        break

torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])
torch.Size([64, 29, 29])


In [7]:
# class Autoencoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,input_dim):
#         super(Autoencoder, self).__init__()
#         output_dim = input_dim
#         #TODO
#         self.encoder = Encoder(latent_dim,hidden_dim,input_dim)
#         self.decoder = Decoder(latent_dim,hidden_dim,output_dim)

#     def forward(self, x):
#         #TODO
#         encoded = self.encoder(x)
#         decoded = self.decoder(encoded)
#         return decoded
    
# class Encoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,input_dim):
#         super(Encoder, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, latent_dim)
#         )
    
#     def forward(self, x):
#         x = x.view(x.size(0), -1)
#         return self.model(x)
    

# class Decoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,output_dim):
#         super(Decoder, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(latent_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, output_dim),
#             nn.Sigmoid()
#         )
    
#     def forward(self, x):
#         x = self.model(x)
#         return x.view(x.size(0),5) 

In [27]:
# hidden_dim = 15
# latent_dim = 2
# n_epochs = 15 
# from torchsummary import summary
# autoencoder = Autoencoder(latent_dim, hidden_dim,5).to(device)
# summary(autoencoder,(1,5))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 15]              90
         LeakyReLU-2                   [-1, 15]               0
            Linear-3                   [-1, 15]             240
         LeakyReLU-4                   [-1, 15]               0
            Linear-5                    [-1, 2]              32
           Encoder-6                    [-1, 2]               0
            Linear-7                   [-1, 15]              45
         LeakyReLU-8                   [-1, 15]               0
            Linear-9                   [-1, 15]             240
        LeakyReLU-10                   [-1, 15]               0
           Linear-11                    [-1, 5]              80
          Sigmoid-12                    [-1, 5]               0
          Decoder-13                    [-1, 5]               0
Total params: 727
Trainable params: 727

In [8]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        #TODO
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 4, 3, stride=2, padding=0), # 1x29x29 -> 4x14x14
            nn.LeakyReLU(0.2),
            nn.Conv2d(4, 8, 3, stride=1, padding=1), # 4x14x14 -> 8x14x14
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, stride=2), # 8x14x14 -> 8x7x7
            nn.Conv2d(8, 16, 3, stride=2, padding=0), # 8x7x7 -> 16x3x3
            nn.LeakyReLU(0.2),
            # nn.MaxPool2d(2, stride=2),
            nn.Conv2d(16, 32, 3, stride=1, padding=0), # 16x3x3 -> 32x1x1
            nn.LeakyReLU(0.2),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=0, output_padding=0), # 32x1x1 -> 16x3x3
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=0, output_padding=0), # 16x3x3 -> 8x7x7
            nn.LeakyReLU(0.2),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), # 8x7x7 -> 8x14x14
            nn.ConvTranspose2d(8, 4, 3, stride=1, padding=1), # 8x14x14 -> 4x14x14
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(4, 1, 3, stride=2, padding=0, output_padding=0), # 4x14x14 -> 1x29x29
            nn.Sigmoid()
        )
    
    def forward(self,x):
        #TODO
        # print("shape in autoencoder", x.shape) # torch.Size([1, 1, 5, 5])
        x = self.encoder(x)
        # print("shape after encoder", x.shape)
        x = self.decoder(x)
        # print("shape after decoder", x.shape)
        return x

In [11]:
from torchsummary import summary
autoencoder = Autoencoder()
n_epochs = 25
summary(autoencoder,(1,29,29))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 14, 14]              40
         LeakyReLU-2            [-1, 4, 14, 14]               0
            Conv2d-3            [-1, 8, 14, 14]             296
         LeakyReLU-4            [-1, 8, 14, 14]               0
         MaxPool2d-5              [-1, 8, 7, 7]               0
            Conv2d-6             [-1, 16, 3, 3]           1,168
         LeakyReLU-7             [-1, 16, 3, 3]               0
            Conv2d-8             [-1, 32, 1, 1]           4,640
         LeakyReLU-9             [-1, 32, 1, 1]               0
  ConvTranspose2d-10             [-1, 16, 3, 3]           4,624
        LeakyReLU-11             [-1, 16, 3, 3]               0
  ConvTranspose2d-12              [-1, 8, 7, 7]           1,160
        LeakyReLU-13              [-1, 8, 7, 7]               0
         Upsample-14            [-1, 8,

In [14]:
from tqdm import tqdm

criterion = nn.MSELoss() # Loss function
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001) # Optimizer

for epoch in range(n_epochs):
    train_loss = 0
    # Wrap your dataloader with tqdm to add a progress bar
    for x in tqdm(dataloader, desc=f'Epoch {epoch + 1}/{n_epochs}', leave=False):
        x = x.to(device)
        x = x.unsqueeze(1)

        # If x has a shape of odd number, pad it with zeros
        # if x.shape[2] % 2 != 0:
        #     x = nn.ZeroPad2d((0, 1, 0, 1))(x)

        optimizer.zero_grad()
        output = autoencoder(x)
        loss = criterion(output, x)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(dataloader)
    print(f'Epoch: {epoch + 1} \tTraining Loss: {train_loss:.6f}')

                                                               

Epoch: 1 	Training Loss: 0.007309


                                                               

Epoch: 2 	Training Loss: 0.007169


                                                               

Epoch: 3 	Training Loss: 0.007103


                                                               

Epoch: 4 	Training Loss: 0.007008


                                                             

KeyboardInterrupt: 

In [29]:
def symetic_matrix(matrix):
    n = matrix.shape[0]
    output = torch.zeros(n, n)
    for i in range(n):
        for j in range(i, n):
            if i == j:
                continue
            result = 1 if matrix[i][j] + matrix[j][i] > 0.5 else 0
            output[i][j] = result
            output[j][i] = result
    return output

In [31]:
# print latent space
import matplotlib.pyplot as plt
import numpy as np

for i in range(20):
    x = data[i].to(device)
    x = x.unsqueeze(0)
    x = x.unsqueeze(0)
    # print(x)
    encoded = autoencoder.encoder(x)
    decoded = autoencoder.decoder(encoded)
    encoded = encoded.view(-1)
    # print(encoded)
    decoded = decoded.view(29, 29)
    decoded = symetic_matrix(decoded)
    print("total edge in input", torch.sum(x) // 2)
    print("total edge in output", torch.sum(decoded) // 2)
    print(np.sum(np.abs(x.detach().numpy() - decoded.detach().numpy())) // 2)


total edge in input tensor(8.)
total edge in output tensor(8.)
4.0
total edge in input tensor(6.)
total edge in output tensor(4.)
1.0
total edge in input tensor(4.)
total edge in output tensor(4.)
0.0
total edge in input tensor(6.)
total edge in output tensor(6.)
4.0
total edge in input tensor(4.)
total edge in output tensor(4.)
0.0
total edge in input tensor(6.)
total edge in output tensor(4.)
1.0
total edge in input tensor(14.)
total edge in output tensor(10.)
12.0
total edge in input tensor(10.)
total edge in output tensor(6.)
4.0
total edge in input tensor(12.)
total edge in output tensor(16.)
8.0
total edge in input tensor(10.)
total edge in output tensor(4.)
5.0
total edge in input tensor(12.)
total edge in output tensor(6.)
7.0
total edge in input tensor(10.)
total edge in output tensor(4.)
5.0
total edge in input tensor(20.)
total edge in output tensor(22.)
9.0
total edge in input tensor(16.)
total edge in output tensor(14.)
11.0
total edge in input tensor(16.)
total edge in ou