In [9]:
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
import torch
from typing import Counter
import torch.nn as nn
import torch.optim as optim

In [16]:
# Use this if you are using any Cuda enabled system
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [17]:
class TransMolecule(object):

    def __init__(self, molecule_num=5):
        self.molecule_num = molecule_num

    def __call__(self, sample):
        num_atoms = sample.x.size(0)
        edges = sample.edge_index
        adj_matrix = torch.zeros(num_atoms, num_atoms)
        for i in range(edges.size(1)):
            start, end = edges[0][i], edges[1][i]
            adj_matrix[start][end] = 1
            adj_matrix[end][start] = 1

        return adj_matrix

In [102]:
data = QM9(root='./practice_data', transform=TransMolecule())

"""
each batch is considered a hugh graph with many nodes and edges,
in EGNN, they introduce the concept of l2 distance between nodes, 
yet I am not including this (probably not) for now. 


"""
dataloader = DataLoader(data, batch_size=1, shuffle=False) 
for i , x in enumerate(dataloader):
    print(x.shape)
    if i > 20:
        break

torch.Size([1, 5, 5])
torch.Size([1, 4, 4])
torch.Size([1, 3, 3])
torch.Size([1, 4, 4])
torch.Size([1, 3, 3])
torch.Size([1, 4, 4])
torch.Size([1, 8, 8])
torch.Size([1, 6, 6])
torch.Size([1, 7, 7])
torch.Size([1, 6, 6])
torch.Size([1, 7, 7])
torch.Size([1, 6, 6])
torch.Size([1, 11, 11])
torch.Size([1, 9, 9])
torch.Size([1, 9, 9])
torch.Size([1, 9, 9])
torch.Size([1, 7, 7])
torch.Size([1, 10, 10])
torch.Size([1, 9, 9])
torch.Size([1, 8, 8])
torch.Size([1, 14, 14])
torch.Size([1, 12, 12])


In [26]:
# class Autoencoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,input_dim):
#         super(Autoencoder, self).__init__()
#         output_dim = input_dim
#         #TODO
#         self.encoder = Encoder(latent_dim,hidden_dim,input_dim)
#         self.decoder = Decoder(latent_dim,hidden_dim,output_dim)

#     def forward(self, x):
#         #TODO
#         encoded = self.encoder(x)
#         decoded = self.decoder(encoded)
#         return decoded
    
# class Encoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,input_dim):
#         super(Encoder, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, latent_dim)
#         )
    
#     def forward(self, x):
#         x = x.view(x.size(0), -1)
#         return self.model(x)
    

# class Decoder(nn.Module):
#     def __init__(self, latent_dim,hidden_dim,output_dim):
#         super(Decoder, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(latent_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, hidden_dim),
#             nn.LeakyReLU(),
#             nn.Linear(hidden_dim, output_dim),
#             nn.Sigmoid()
#         )
    
#     def forward(self, x):
#         x = self.model(x)
#         return x.view(x.size(0),5) 

In [27]:
# hidden_dim = 15
# latent_dim = 2
# n_epochs = 15 
# from torchsummary import summary
# autoencoder = Autoencoder(latent_dim, hidden_dim,5).to(device)
# summary(autoencoder,(1,5))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 15]              90
         LeakyReLU-2                   [-1, 15]               0
            Linear-3                   [-1, 15]             240
         LeakyReLU-4                   [-1, 15]               0
            Linear-5                    [-1, 2]              32
           Encoder-6                    [-1, 2]               0
            Linear-7                   [-1, 15]              45
         LeakyReLU-8                   [-1, 15]               0
            Linear-9                   [-1, 15]             240
        LeakyReLU-10                   [-1, 15]               0
           Linear-11                    [-1, 5]              80
          Sigmoid-12                    [-1, 5]               0
          Decoder-13                    [-1, 5]               0
Total params: 727
Trainable params: 727

In [94]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        #TODO
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, 3, stride=1, padding=1),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(8, 16, 3, stride=1, padding=1),
            nn.LeakyReLU(0.2),
            # nn.MaxPool2d(2, stride=2),
            # nn.Conv2d(16, 32, 3, stride=1, padding=1),
            # nn.LeakyReLU(0.2),
        )
        self.decoder = nn.Sequential(
            # nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            # nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(8, 1, 3, stride=1, padding=1),
            nn.Sigmoid()
        )
    
    def forward(self,x):
        #TODO
        # print("shape in autoencoder", x.shape) # torch.Size([1, 1, 5, 5])
        x = self.encoder(x)
        # print("shape after encoder", x.shape)
        x = self.decoder(x)
        # print("shape after decoder", x.shape)
        return x

In [97]:
from torchsummary import summary
autoencoder = Autoencoder()
n_epochs = 10
summary(autoencoder,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
         LeakyReLU-2            [-1, 8, 28, 28]               0
         MaxPool2d-3            [-1, 8, 14, 14]               0
            Conv2d-4           [-1, 16, 14, 14]           1,168
         LeakyReLU-5           [-1, 16, 14, 14]               0
   ConvTranspose2d-6            [-1, 8, 28, 28]           1,160
         LeakyReLU-7            [-1, 8, 28, 28]               0
   ConvTranspose2d-8            [-1, 1, 28, 28]              73
           Sigmoid-9            [-1, 1, 28, 28]               0
Total params: 2,481
Trainable params: 2,481
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.26
Params size (MB): 0.01
Estimated Total Size (MB): 0.28
-----------------------------------------------

In [101]:
from tqdm import tqdm

criterion = nn.MSELoss() # Loss function
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001) # Optimizer

for epoch in range(n_epochs):
    train_loss = 0
    # Wrap your dataloader with tqdm to add a progress bar
    for x in tqdm(dataloader, desc=f'Epoch {epoch + 1}/{n_epochs}', leave=False):
        x = x.to(device)
        x = x.unsqueeze(1)

        # If x has a shape of odd number, pad it with zeros
        if x.shape[2] % 2 != 0:
            x = nn.ZeroPad2d((0, 1, 0, 1))(x)

        optimizer.zero_grad()
        output = autoencoder(x)
        loss = criterion(output, x)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(dataloader)
    print(f'Epoch: {epoch + 1} \tTraining Loss: {train_loss:.6f}')


                                                

RuntimeError: stack expects each tensor to be equal size, but got [20, 7, 7] at entry 0 and [2, 3, 3] at entry 1