In [1]:
# -*- coding: utf-8 -*-
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import DataLoader

from dgllife.data import TencentAlchemyDataset
from dgllife.utils import EarlyStopping, Meter

# from utils import set_random_seed, collate_molgraphs, load_model

def regress(args, model, bg):
    bg = bg.to(args['device'])
    if args['model'] == 'MPNN':
        h = bg.ndata.pop('n_feat')
        e = bg.edata.pop('e_feat')
        h, e = h.to(args['device']), e.to(args['device'])
        return model(bg, h, e)
    elif args['model'] in ['SchNet', 'MGCN']:
        node_types = bg.ndata.pop('node_type')
        edge_distances = bg.edata.pop('distance')
        node_types, edge_distances = node_types.to(args['device']), \
                                     edge_distances.to(args['device'])
        return model(bg, node_types, edge_distances)

def run_a_train_epoch(args, epoch, model, data_loader,
                      loss_criterion, optimizer):
    model.train()
    train_meter = Meter()
    for batch_id, batch_data in enumerate(data_loader):
        smiles, bg, labels = batch_data
        labels = labels.to(args['device'])
        prediction = regress(args, model, bg)
        loss = (loss_criterion(prediction, labels)).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_meter.update(prediction, labels)
    total_score = np.mean(train_meter.compute_metric(args['metric_name']))
    print('epoch {:d}/{:d}, training {} {:.4f}'.format(
        epoch + 1, args['num_epochs'], args['metric_name'], total_score))

def run_an_eval_epoch(args, model, data_loader):
    model.eval()
    eval_meter = Meter()
    with torch.no_grad():
        for batch_id, batch_data in enumerate(data_loader):
            smiles, bg, labels = batch_data
            labels = labels.to(args['device'])
            prediction = regress(args, model, bg)
            eval_meter.update(prediction, labels)
        total_score = np.mean(eval_meter.compute_metric(args['metric_name']))
    return total_score

def main(args):
    args['device'] = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    set_random_seed(args['random_seed'])

    train_set = TencentAlchemyDataset(mode='dev')
    val_set = TencentAlchemyDataset(mode='valid')

    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs)

    model = load_model(args)
    loss_fn = nn.MSELoss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    stopper = EarlyStopping(mode=args['mode'], patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print('epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.format(
            epoch + 1, args['num_epochs'], args['metric_name'], val_score,
            args['metric_name'], stopper.best_score))

        if early_stop:
            break

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_set = TencentAlchemyDataset(mode='dev')
val_set = TencentAlchemyDataset(mode='valid')



99776 loaded!
3951 loaded!


In [14]:
train_set[0][1].edata

{'e_feat': tensor([[0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0.],

In [20]:
# -*- coding: utf-8 -*-
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import dgl
import numpy as np
import random
import torch

def set_random_seed(seed=0):
    """Set random seed.

    Parameters
    ----------
    seed : int
        Random seed to use. Default to 0.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

def collate_molgraphs(data):
    """Batching a list of datapoints for dataloader.

    Parameters
    ----------
    data : list of 4-tuples.
        Each tuple is for a single datapoint, consisting of
        a SMILES, a DGLGraph, all-task labels and a binary
        mask indicating the existence of labels.

    Returns
    -------
    smiles : list
        List of smiles
    bg : DGLGraph
        The batched DGLGraph.
    labels : Tensor of dtype float32 and shape (B, T)
        Batched datapoint labels. B is len(data) and
        T is the number of total tasks.
    """
    smiles, graphs, labels = map(list, zip(*data))

    bg = dgl.batch(graphs)
    bg.set_n_initializer(dgl.init.zero_initializer)
    bg.set_e_initializer(dgl.init.zero_initializer)
    labels = torch.stack(labels, dim=0)

    return smiles, bg, labels

def load_model(args):
    if args['model'] == 'SchNet':
        from dgllife.model import SchNetPredictor
        model = SchNetPredictor(node_feats=args['node_feats'],
                                hidden_feats=args['hidden_feats'],
                                predictor_hidden_feats=args['predictor_hidden_feats'],
                                n_tasks=args['n_tasks'])

    if args['model'] == 'MGCN':
        from dgllife.model import MGCNPredictor
        model = MGCNPredictor(feats=args['feats'],
                              n_layers=args['n_layers'],
                              predictor_hidden_feats=args['predictor_hidden_feats'],
                              n_tasks=args['n_tasks'])

    if args['model'] == 'MPNN':
        from dgllife.model import MPNNPredictor
        model = MPNNPredictor(node_in_feats=args['node_in_feats'],
                              edge_in_feats=args['edge_in_feats'],
                              node_out_feats=args['node_out_feats'],
                              edge_hidden_feats=args['edge_hidden_feats'],
                              n_tasks=args['n_tasks'])

    return model

In [21]:
train_loader = DataLoader(dataset=train_set,batch_size=24,collate_fn=collate_molgraphs)
# val_loader = DataLoader(dataset=val_set)

In [27]:
from dgllife.model import SchNetPredictor

In [28]:
model = SchNetPredictor()

In [29]:
bg = next(iter(train_loader))

In [31]:
bg = bg[1]

In [34]:
h = bg.ndata.pop('n_feat')

In [36]:
bg.ndata

{'node_type': tensor([ 6,  8,  6, 16,  6,  6,  8,  8,  6,  6,  6,  8,  6,  6,  6,  6,  8,  6,
         6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  8,  6,
         7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  6,  6,  7,  6,  8,  6,  6,
         6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  8,  6,  6,  6,  8,  6,  8,
         6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  8,  6,  6,  6,
         6,  8,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  8,  8,  6,
         6,  8,  6,  7,  6,  6,  6,  6,  8,  8,  6,  6,  6,  8,  8,  6,  6,  8,
         7,  6,  6,  6,  6,  8,  6,  6,  6,  8,  6,  7,  6,  6,  6,  7,  6,  6,
         8,  6,  6,  6,  6,  6,  6,  8,  6,  6,  7,  6,  6,  6,  6,  6,  6,  7,
         6,  7,  6,  8,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  8,  6,  6,  8,
         6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  8,  6,  6,  6,  8,
         8,  6,  6,  6,  6,  6,  6,  7,  6,  8,  6,  6,  6,  8,  6,  8,  6,  6,
         8,  6,  6,  7,  6

In [35]:
h

tensor([[0., 1., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 1., 0.,  ..., 0., 1., 3.],
        ...,
        [0., 1., 0.,  ..., 0., 1., 2.],
        [0., 1., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.]])

In [8]:
dd = megnet[0]

In [10]:
dd.keys()

dict_keys(['id', 'desc', 'formula', 'e_hull', 'gap pbe', 'mu_b', 'elastic anisotropy', 'bulk modulus', 'shear modulus', 'atoms', 'e_form'])

In [13]:
dd["atoms"].keys()

dict_keys(['lattice_mat', 'coords', 'elements', 'abc', 'angles', 'cartesian', 'props'])

In [23]:
X = pd.DataFrame(dd["atoms"]["coords"])

In [14]:
from models.dataloading import *

In [27]:
from jarvis.core.atoms import Atoms

In [31]:
structure = Atoms.from_dict(dd["atoms"])

In [32]:
structure.raw_distance_matrix

array([[0.        , 5.39076238, 5.16244463, ..., 2.51134294, 4.5880922 ,
        2.86475125],
       [5.39076238, 0.        , 4.40974077, ..., 7.56552437, 6.01936939,
        5.40159774],
       [5.16244463, 4.40974077, 0.        , ..., 7.62887609, 6.46981477,
        7.16837045],
       ...,
       [2.51134294, 7.56552437, 7.62887609, ..., 0.        , 5.60125545,
        3.06735942],
       [4.5880922 , 6.01936939, 6.46981477, ..., 5.60125545, 0.        ,
        5.235826  ],
       [2.86475125, 5.40159774, 7.16837045, ..., 3.06735942, 5.235826  ,
        0.        ]])

In [33]:
def atoms_to_distance(atom):
    structure = Atoms.from_dict(atom)
    return torch.tensor(structure.raw_distance_matrix)

In [None]:
def get_interaction_graph(d, cutoff):
    
    n_interactions = ((d <= cutoff)*(d>0)).sum()
    
    
    edge_index = torch.zeros((n_interactions, 2), dtype=int)
    edge_weight = torch.zeros((n_interactions,))
    
    cnt = 0
    for i in range(d.shape[0]):
        for j in range(d.shape[1]):
            
            if 0 < d[i,j] <= cutoff:
                
                edge_index[cnt] = torch.tensor([i,j])
                edge_weight[cnt] = d[i,j]
                cnt += 1
                
    return edge_index, edge_weight

In [1]:
import os
import sys
# sys.path.insert(0, "/home/holywater2/2023/porousequivariantnetworks/code/")
# sys.path.insert(0, "/home/holywater2/2023/porousequivariantnetworks/")
print(sys.path)
# sys.path.insert(0, "../porousequivariantnetworks/code/models")

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from models.schnet import SchNet

['/home/holywater2/2023/_Reproduce', '/home/holywater2/.conda/envs/porous/lib/python310.zip', '/home/holywater2/.conda/envs/porous/lib/python3.10', '/home/holywater2/.conda/envs/porous/lib/python3.10/lib-dynload', '', '/home/holywater2/.conda/envs/porous/lib/python3.10/site-packages', '/home/holywater2/.conda/envs/porous/lib/python3.10/site-packages/mpmath-1.2.1-py3.10.egg']


In [3]:
from utils.ZeoliteData import get_zeolite, get_data_pore, get_data_graph, get_data_megnet
from utils.dataloading import get_data, get_graph_data

import numpy
numpy.set_printoptions(threshold=10_000)

In [4]:
data = get_zeolite('MOR')

In [5]:
data

{'ref': array([[ 1,  1,  1],
        [-1, -1,  1],
        [ 1, -1, -1],
        [-1,  1, -1],
        [-1, -1, -1],
        [ 1,  1, -1],
        [-1,  1,  1],
        [ 1, -1,  1],
        [ 1,  1,  1],
        [-1, -1,  1],
        [ 1, -1, -1],
        [-1,  1, -1],
        [-1, -1, -1],
        [ 1,  1, -1],
        [-1,  1,  1],
        [ 1, -1,  1]]),
 'tra': array([[0. , 0. , 0. ],
        [0. , 0. , 0.5],
        [0. , 0. , 0. ],
        [0. , 0. , 0.5],
        [0. , 0. , 0. ],
        [0. , 0. , 0.5],
        [0. , 0. , 0. ],
        [0. , 0. , 0.5],
        [0.5, 0.5, 0. ],
        [0.5, 0.5, 0.5],
        [0.5, 0.5, 0. ],
        [0.5, 0.5, 0.5],
        [0.5, 0.5, 0. ],
        [0.5, 0.5, 0.5],
        [0.5, 0.5, 0. ],
        [0.5, 0.5, 0.5]]),
 'l': array([18.256, 20.534,  7.542]),
 'X': array([[0.3057, 0.0736, 0.0435],
        [0.3028, 0.3106, 0.0437],
        [0.415 , 0.121 , 0.75  ],
        [0.415 , 0.277 , 0.75  ],
        [0.6943, 0.9264, 0.5435],
        [0.6972,

In [7]:
ref = data['ref'] # reflections
tra = data['tra'] # translations
l = data['l'] # scale of the unit cell

In [29]:
atoms, hoa, X, A, d, X_pore, A_pore, d_pore, pore = get_data(l)
# X is positon
# A is adjacent matrix
# d is distance matrix

In [23]:
edges, idx1, idx2, idx2_oh = get_graph_data(A, d)

In [14]:
len(atoms)

4123

In [37]:
mpnn = SchNet(d).to('cuda')

In [38]:
_, testloader, trainloader = get_data_graph(atoms, hoa, edges, bs=32, sub_lim=12)

In [44]:
testloader

<torch.utils.data.dataloader.DataLoader at 0x7f8522c2f5b0>

In [46]:
a = next(iter(testloader))

In [53]:
a

[tensor([[[0],
          [0],
          [0],
          ...,
          [1],
          [0],
          [0]],
 
         [[0],
          [0],
          [1],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [1],
          [1],
          [1]],
 
         ...,
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [1],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[1],
          [1],
          [0],
          ...,
          [0],
          [0],
          [0]]]),
 tensor([[[3.1075],
          [3.1224],
          [3.1264],
          ...,
          [3.2343],
          [3.0959],
          [3.1073]],
 
         [[3.1075],
          [3.1224],
          [3.1264],
          ...,
          [3.2343],
          [3.0959],
          [3.1073]],
 
         [[3.1075],
          [3.1224],
          [3.1264],


In [49]:
trainloss, testloss = mpnn.fit(trainloader, testloader, 200, scale_loss=False, opt=optim.AdamW,opt_kwargs={'lr':0.001}, crit_kwargs={'delta':1.0})

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [0],
        [0],
        [1],
        [0],
        [1],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [0],
        [0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [0],
        [1],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [0]])

In [None]:
# parser = argparse.ArgumentParser()

#parser.add_argument('-n', '--name', type=str)
# parser.add_argument('-m', '--model_type', choices=['pore', 'equi','megnet','cgcnn','schnet'], type=str)
# parser.add_argument('-p', '--prop_train', type=float)
# parser.add_argument('-r', '--repetitions', type=int)
# parser.add_argument('-i', '--initial_repetition', type=int, default=1)

# args = parser.parse_args()


#model_name = args.name
# print(args)
# for i in range(args.repetitions):

# print('started repetition', i)

# model_name = f'model_{i+1+args.initial_repetition}'

data_dir = f'model_data/{args.prop_train}/{args.model_type}/{model_name}/'

os.makedirs(data_dir)

print('started!')


data = get_zeolite('MOR')

ref = data['ref'] # reflections
tra = data['tra'] # translations
l = data['l'] # scale of the unit cell

# specific for MOR
atoms, hoa, X, A, d, X_pore, A_pore, d_pore, pore = get_data(l)

edges, idx1, idx2, idx2_oh = get_graph_data(A, d)

if args.model_type == 'pore':

    edges_sp, idx1_sp, idx2_sp, idx2_oh_sp = get_graph_data(A_pore, d_pore)
    edges_ps, idx1_ps, idx2_ps, idx2_oh_ps = get_graph_data(A_pore.T, d_pore.T)

    mpnn = MPNNPORE(idx1.to('cuda'), idx2.to('cuda'), idx2_oh.to('cuda'), X, X_pore, ref, tra,
                    idx1_sp.to('cuda'), idx2_sp.to('cuda'), idx2_oh_sp.to('cuda'), 
                    idx1_ps.to('cuda'), idx2_ps.to('cuda'), idx2_oh_ps.to('cuda'),
                    hid_size=[8]*6, site_emb_size=8, edge_emb_size=8, mlp_size=24,
                    centers=10, mx_d=6, width=1, pool='sum').to('cuda')
    _, testloader, trainloader = get_data_pore(atoms, hoa, edges, pore, edges_sp, edges_ps, bs=32, sub_lim=12, p=args.prop_train)

elif args.model_type == 'equi':

    mpnn = MPNN(idx1.to('cuda'), idx2.to('cuda'), idx2_oh.to('cuda'), X, ref, tra,
                    hid_size=[8]*6, site_emb_size=8, edge_emb_size=8, mlp_size=24,
                    centers=10, mx_d=6, width=1, pool='sum').to('cuda')


    _, testloader, trainloader = get_data_graph(atoms, hoa, edges, bs=32, sub_lim=12, p=args.prop_train)

elif args.model_type == 'megnet':

    mpnn = MEGNet(idx1.to('cuda'), idx2.to('cuda')).to('cuda')


    _, testloader, trainloader = get_data_megnet(atoms, hoa, edges, bs=32, sub_lim=12, p=args.prop_train)


elif args.model_type == 'cgcnn':

    mpnn = CGCNN(idx1.to('cuda'), idx2.to('cuda')).to('cuda')


    _, testloader, trainloader = get_data_graph(atoms, hoa, edges, bs=32, sub_lim=12, p=args.prop_train)
    

elif args.model_type == 'schnet':
    
    mpnn = SchNet(d).to('cuda')

    
    _, testloader, trainloader = get_data_graph(atoms, hoa, edges, bs=32, sub_lim=12, p=args.prop_train)

print('starting fitting!')
trainloss, testloss = mpnn.fit(trainloader, testloader, 200, scale_loss=False, opt=optim.AdamW,opt_kwargs={'lr':0.001}, crit_kwargs={'delta':1.0})


print('done fitting!')


torch.save(mpnn.state_dict(), f'{data_dir}/model.pth')

np.save(f'{data_dir}/tr_loss.npy', trainloss)

np.save(f'{data_dir}/te_loss.npy', testloss)
