In [1]:
import os.path as osp
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn import ModuleList, Embedding
from torch.nn import Sequential, ReLU, Linear
from torch.nn.modules.module import T
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.utils import degree
from torch_geometric.datasets import ZINC
from torch_geometric.loader import DataLoader
from torch_geometric.nn import BatchNorm, global_add_pool
from mma_conv import MMAConv
import argparse
import numpy as np
from Alchemy_dataset import TencentAlchemyDataset
from validate import get_valid_dataset, get_valid_targets
import wandb
from sklearn.metrics import mean_squared_error, mean_absolute_error

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class x:
    def __init__(self) -> None:
        pass
args = x()

args.seed=42
args.epochs=200
args.lr = 3e-4
args.weight_decay = 5e-4
args.hidden_dim=16
args.out_dim=16
args.edge_dim=16
args.dropout = 0.5
args.batch_size=16
args.tower=1
args.aggregators = "mean,max,min"
args.scalers = "identity,amplification,attenuation"
args.L=4
args.mask = True
args.property_num = 0

In [3]:
import os
os.environ['WANDB_NOTEBOOK_NAME'] = "run.ipynb"

In [4]:
wandb.finish()

In [5]:
wandb.init(
# Set the project where this run will be logged
project="alchemy", 
# We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
name=f'MMA_property{args.property_num}_epochs{args.epochs}', 
# Track hyperparameters and run metadata
config={
"architecture": "MMA",
})

[34m[1mwandb[0m: Currently logged in as: [33mgrandma[0m ([33mpgm_alchemy[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)

#Load Alchemy Dataset

print("Loading dataset")
train_dataset = TencentAlchemyDataset(root='data-bin', mode='dev').shuffle()
valid_dataset = TencentAlchemyDataset(root='data-bin', mode='valid')

# train_dataset.data.y = train_dataset.data.y[:,args.property_num]

valid_targets =  get_valid_targets()
valid_dataset = get_valid_dataset(valid_targets, valid_dataset)

# valid_dataset.data.y = valid_dataset.data.y[:,args.property_num]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=64)
test_loader = DataLoader(valid_dataset, batch_size=64)


print("Dataset loaded, you are sexy!")

Loading dataset
['data-bin/processed/TencentAlchemy_dev.pt']
['data-bin/processed/TencentAlchemy_valid.pt']
Dataset loaded, you are sexy!




In [7]:
deg = torch.zeros(5, dtype=torch.long)
for data in train_dataset:
    d = degree(data.edge_index[1], num_nodes=data.num_nodes, dtype=torch.long)
    deg += torch.bincount(d, minlength=deg.numel())

In [8]:
# conv = MMAConv(in_channels=75, out_channels=75,
#                            aggregators=args.aggregators.split(","), scalers=args.scalers.split(","), deg=deg,
#                            edge_dim=50, towers=5, pre_layers=1, post_layers=1,
#                            mask = args.mask, divide_input=False)

In [9]:
# # Load ZINC dataset
# path = "mma/data"
# train_dataset2 = ZINC(path, subset=True, split='train')
# val_dataset2 = ZINC(path, subset=True, split='val')
# test_dataset2 = ZINC(path, subset=True, split='test')



# # # Create data loaders
# train_loader2 = DataLoader(train_dataset2, batch_size=64, shuffle=True)
# val_loader2 = DataLoader(val_dataset2, batch_size=64)
# test_loader2 = DataLoader(test_dataset2, batch_size=64) 

In [10]:
class Net(torch.nn.Module):
    """
    Neural network class definition.
    """
    def __init__(self, args, aggregator_list, scaler_list):
        """
        Initializes the neural network.

        Args:
        - args: Command line arguments.
        - aggregator_list: List of aggregators.
        - scaler_list: List of scalers.
        """
        super(Net, self).__init__()

        # Define node and edge embeddings
        # self.node_emb = Embedding(1000, 75)
        # self.edge_emb = Embedding(100, 50)
        self.node_layer = nn.Linear(15,75)
        self.edge_layer = nn.Linear(4,50)

        # Define aggregators and scalers
        aggregators = aggregator_list
        scalers = scaler_list

        # self.layer1 = TransformerConv(in_channels=15,out_channels=1,edge_dim=4)
        # GATConv(in_channels=15,out_channels=1)

        # Define convolutional layers and batch normalization layers
        self.convs = ModuleList()
        self.batch_norms = ModuleList()
        
        # Add four convolutional layers with corresponding batch normalization layers
        for _ in range(4): # 4 ------> number of layers
            conv = MMAConv(in_channels=75, out_channels=75,
                           aggregators=aggregators, scalers=scalers, deg=deg,
                           edge_dim=50, towers=5, pre_layers=1, post_layers=1,
                           mask = args.mask, divide_input=False)
            self.convs.append(conv)
            self.batch_norms.append(BatchNorm(75))

        # Define fully connected layers
        self.mlp = Sequential(Linear(75, 50), ReLU(), Linear(50, 25), ReLU(),
                              Linear(25, 12))

    def forward(self, x, edge_index, edge_attr, batch):
        """
        Defines the forward pass of the neural network.

        Args:
        - x: Node features.
        - edge_index: Edge indices.
        - edge_attr: Edge attributes.
        - batch: Batch indices.

        Returns:
        - The output of the neural network.
        """

        # x = self.layer1(data.x,data.edge_index,data.edge_attr)

        # x = self.node_emb(x.squeeze().long())
        # edge_attr = self.edge_emb(edge_attr.long())

        x = self.node_layer(x).unsqueeze(1)
        edge_attr = self.edge_layer(edge_attr)

        # print(x.shape,edge_attr.shape)
        # Perform convolutional and batch normalization operations
        for conv, batch_norm in zip(self.convs, self.batch_norms):
            x = F.relu(batch_norm(conv(x, edge_index, edge_attr)))
        
        # Perform global pooling
        x = global_add_pool(x, batch)
        
        # Perform fully connected layers
        return self.mlp(x)


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize neural network, move to GPU if available
model = Net(args, aggregator_list=args.aggregators.split(","), scaler_list=args.scalers.split(",")).to(device)

# Initialize optimizer and learning rate scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, min_lr=0.00001)

device(type='cuda')

In [12]:

import tqdm
from tqdm import tqdm

train_stats = []
valid_stats = []


for epoch in range(100):

    total_loss = 0
    model.train()

    total_train_mse = torch.zeros(12)
    total_train_mae = torch.zeros(12)

    for data in tqdm(train_loader):
        
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        # loss = (out.squeeze() - data.y).abs().mean()
        loss_matrix = ((out.squeeze() - data.y)**2)
        train_mse = torch.mean(loss_matrix, axis=0).cpu()
        loss = train_mse.sum()
        loss.backward()
        optimizer.step()
        
        train_mae = torch.mean(torch.abs(out.squeeze() - data.y), axis=0).cpu()

        total_train_mse += train_mse
        total_train_mae += train_mae
    
        total_loss += loss.item()

    total_train_mse /= len(train_loader)
    total_train_mae /= len(train_loader)
    train_mse_stats = {f"Train MSE property {i}":train_mse[i].item() for i in range(train_mse.shape[0])}
    train_mae_stats = {f"Train MAE property {i}":train_mae[i].item() for i in range(train_mae.shape[0])}
    wandb.log(train_mse_stats)
    wandb.log(train_mae_stats)




    print("train loss", total_loss/len(train_loader))
    
    model.eval()

    total_val_mse = torch.zeros(12)
    total_val_mae = torch.zeros(12)

    for data in tqdm(val_loader):

        data = data.to(device)
        
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
    

        loss_matrix = ((out.squeeze() - data.y)**2)
        val_mse = torch.mean(loss_matrix, axis=0).cpu()
        loss = val_mse.sum()
        loss.backward()
        
        val_mae = torch.mean(torch.abs(out.squeeze() - data.y), axis=0).cpu()

        total_val_mse += val_mse
        total_val_mae += val_mae

    total_val_mse /= len(val_loader)
    total_val_mae /= len(val_loader)
    val_mse_stats = {f"Val MSE property {i}":val_mse[i].item() for i in range(val_mse.shape[0])}
    val_mae_stats = {f"Val MAE property {i}":val_mae[i].item() for i in range(val_mae.shape[0])}
    wandb.log(val_mse_stats)
    wandb.log(val_mae_stats)

100%|██████████| 1559/1559 [01:10<00:00, 22.03it/s]


train loss 3.014693555425114


100%|██████████| 62/62 [00:03<00:00, 18.34it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.71it/s]


train loss 2.004225089046877


100%|██████████| 62/62 [00:02<00:00, 20.94it/s]
100%|██████████| 1559/1559 [01:09<00:00, 22.41it/s]


train loss 1.68925467081908


100%|██████████| 62/62 [00:02<00:00, 20.72it/s]
100%|██████████| 1559/1559 [01:09<00:00, 22.33it/s]


train loss 1.4322726870668936


100%|██████████| 62/62 [00:02<00:00, 20.90it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.73it/s]


train loss 1.2520698232479475


100%|██████████| 62/62 [00:02<00:00, 20.95it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.72it/s]


train loss 1.1477757092467327


100%|██████████| 62/62 [00:02<00:00, 20.85it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.71it/s]


train loss 1.0870392169227503


100%|██████████| 62/62 [00:02<00:00, 20.92it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.71it/s]


train loss 1.0555695959434364


100%|██████████| 62/62 [00:02<00:00, 20.72it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.67it/s]


train loss 1.0244351741287325


100%|██████████| 62/62 [00:02<00:00, 20.90it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.70it/s]


train loss 1.0053779989041298


100%|██████████| 62/62 [00:02<00:00, 20.89it/s]
100%|██████████| 1559/1559 [01:08<00:00, 22.69it/s]


train loss 0.9913857810920901


100%|██████████| 62/62 [00:02<00:00, 20.87it/s]
 32%|███▏      | 501/1559 [00:22<00:46, 22.65it/s]

In [24]:
train_mse, loss

(tensor([0.7392, 1.1109, 1.1208, 0.7673, 1.6818, 0.7486, 0.8198, 0.6573, 0.6853,
         0.8563, 1.3191, 0.9090], device='cuda:0', grad_fn=<MeanBackward1>),
 tensor(11.4154, device='cuda:0', grad_fn=<SumBackward0>))

In [12]:
import tqdm
from tqdm import tqdm

train_stats = []
valid_stats = []

for epoch in range(100):

    model.train()

    

    for data in tqdm(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        # loss = (out.squeeze() - data.y).abs().mean()
        loss = ((out.squeeze() - data.y)**2).mean()
        loss.backward()
        total_loss.append(loss.item())
        optimizer.step()

        train_mse = mean_squared_error(out.squeeze().detach().cpu().numpy(),data.y.cpu().numpy())
        train_mae = mean_absolute_error(out.squeeze().detach().cpu().numpy(),data.y.cpu().numpy())

        train_stats['Train MSE'] += train_mse
        train_stats['Train MAE'] += train_mae

    train_stats['Train MSE'] /= len(train_loader)
    train_stats['Train MAE'] /= len(train_loader)
    wandb.log(train_stats)



    print("train loss",sum(total_loss)/len(total_loss))
    
    model.eval()

    valid_loss = []

    for data in tqdm(val_loader):

        data = data.to(device)
        
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        loss = ((out.squeeze() - data.y)**2).mean()
        
        valid_loss.append(loss.item())

        val_mse = mean_squared_error(out.squeeze().detach().cpu().numpy(),data.y.cpu().numpy())
        val_mae = mean_absolute_error(out.squeeze().detach().cpu().numpy(),data.y.cpu().numpy())

        val_stats['Val MSE'] += val_mse
        val_stats['Val MAE'] += val_mae
    
    val_stats['Val MSE'] /= len(val_loader)
    val_stats['Val MAE'] /= len(val_loader)

    wandb.log(val_stats)
        

    print("valid loss",sum(valid_loss)/len(valid_loss))

100%|██████████| 1559/1559 [01:14<00:00, 20.99it/s]


train loss 0.11297978189815078


  0%|          | 0/62 [00:00<?, ?it/s]


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.