In [1]:
import os
import torch
import numpy as np
from tqdm import tqdm

from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

from qm9_dataset import QM9DGLDataset

import dgl

from dgllife.model.gnn.mpnn import MPNNGNN
from dgl.nn.pytorch import Set2Set

import pytorch_lightning as pl
import torchmetrics.functional as tm
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

config = {
    "data_path": "./data/",
    "task": "mu",
    "train_data": "qm9_train_data.pt",
    "test_data": "qm9_test_data.pt",
    "batch_size": 256,
    "num_workers": 16
}

In [2]:
dataset = QM9DGLDataset(config["data_path"],
                        config["task"],
                        file_name=config["train_data"],
                        mode='train')

train_dataset, val_dataset = dataset.train_val_random_split(0.8)


train_dataloader = DataLoader(train_dataset,
                              batch_size=config["batch_size"],
                              shuffle=True,
                              collate_fn=dataset.collate_fn,
                              num_workers=config["num_workers"])

valid_dataloader = DataLoader(val_dataset,
                              batch_size=config["batch_size"],
                              shuffle=False,
                              collate_fn=dataset.collate_fn,
                              num_workers=config["num_workers"])

# Test Dataset
test_dataset = QM9DGLDataset(config["data_path"],
                             config["task"],
                             file_name=config["test_data"],
                             mode='test')

test_dataloader = DataLoader(test_dataset,
                             batch_size=config["batch_size"],
                             shuffle=False,
                             collate_fn=test_dataset.collate_fn,
                             num_workers=config["num_workers"])

print(f"Train set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

Loaded train-set, task: mu, source: ./data/, length: 98123
Loaded test-set, task: mu, source: ./data/, length: 32708
Train set size: 78498
Validation set size: 19625
Test set size: 32708


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class MPNN(nn.Module):
    def __init__(self, num_atom_type=5, num_edge_type=4, 
                 node_in_feats=32, node_out_feats=64, 
                 edge_in_feats=32, edge_hidden_feats=64, n_tasks=1):
        super().__init__()
        
        self.node_embedding = nn.Embedding(num_atom_type, node_in_feats)
        self.edge_embedding = nn.Embedding(num_edge_type, edge_in_feats)

        self.gnn = MPNNGNN(
            node_in_feats=32, 
            node_out_feats=64, 
            edge_in_feats=32,
            edge_hidden_feats=64
        )
        
        self.readout = Set2Set(
            input_dim=node_out_feats, 
            n_iters=6,
            n_layers=3
        )
        
        self.predict = nn.Sequential(
            nn.Linear(2*node_out_feats, node_out_feats),
            nn.ReLU(),
            nn.Linear(node_out_feats, n_tasks)
        )
               
        
    def forward(self, graph):
        node_feats = graph.ndata['f'].long()
        node_embeddings = self.node_embedding(node_feats)
        
        edge_feats = graph.edata['f'].long()
        edge_embeddings = self.edge_embedding(edge_feats)
       
        node_feats = self.gnn(graph, node_embeddings, edge_embeddings)
        graph_feats = self.readout(graph, node_feats)
        out = self.predict(graph_feats)
        
        return out

In [4]:
class Mu_predictor(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
    
    def step(self, batch):
        graph, y = batch
        pred = self.model(graph)
        loss = F.l1_loss(pred, y)
        acc = tm.mean_squared_error(pred, y).sqrt()
        
        return pred, loss, acc
    
       
    def training_step(self, batch, batch_idx):
        _, loss, acc = self.step(batch)
        
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        
        return loss

    
    def validation_step(self, batch, batch_idx):
        _, loss, acc = self.step(batch)
        
        self.log('valid_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log("valid_acc", acc, on_step=False, on_epoch=True, prog_bar=True)
    
    
    def test_step(self, batch, batch_idx):
        _, loss, acc = self.step(batch)
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log("test_acc", acc, on_step=False, on_epoch=True, prog_bar=True)
    
    
    def predict_step(self, batch, batch_idx):
        return self.model(batch)
    
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    
        return {"optimizer": optimizer, "lr_scheduler": scheduler}
    
    
model = MPNN()
predictor = Mu_predictor(model)

callbacks = [
    ModelCheckpoint(monitor='valid_loss', save_top_k=3, dirpath='weights/MPNN', filename='GAT-{epoch:03d}-{valid_loss:.4f}-{valid_acc:.4f}'),
]

trainer = pl.Trainer(max_epochs=500, gpus=1, enable_progress_bar=True, callbacks=callbacks)

  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(predictor, train_dataloader, valid_dataloader)

Missing logger folder: /home/ubuntu/Workspace/graph_learning_hackathon/lightning_logs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type | Params
-------------------------------
0 | model | MPNN | 420 K 
-------------------------------
420 K     Trainable params
0         Non-trainable params
420 K     Total params
1.681     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [8]:
ckpt_fname = ""
predictor = predictor.load_from_checkpoint("weights/MPNN/" + ckpt_fname, model=model)

pred = trainer.predict(predictor, test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 307it [00:00, ?it/s]

In [9]:
preds = []

def to_np(x):
    return x.cpu().detach().numpy()

for p in tqdm(pred):
    preds.append(to_np(p))

preds = np.concatenate(preds, axis=0)
np.savetxt('pred.csv', preds)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 128/128 [00:00<00:00, 130784.63it/s]
