In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import networkx as nx
from scipy import sparse

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GINConv, global_max_pool, GlobalAttention, GatedGraphConv
from torch_geometric.data import Data, DataLoader
from torch_geometric.utils import softmax
from torch_geometric.utils.convert import from_scipy_sparse_matrix
from torch_geometric.data import Data, DataLoader


from pyscf import gto, scf, tools, ao2mo


import train
from graph_model import SecondNet, SimpleNet, THCNet
from preprocess import build_qm7, build_thc_graph
from train import train, test
from thc import THCContainer
from utils import khatri_rao

Numpy 1.16 has memory leak bug  https://github.com/numpy/numpy/issues/13808
It is recommended to downgrade to numpy 1.15 or older


In [2]:
basis = 'sto-3g'
# basis = 'cc-pvdz'
mols = build_qm7(basis)
mols = mols[0:2]

In [3]:
kwargs = {'grid_points_per_atom': 300, 'epsilon_qr': 1e-15, 'epsilon_inv': 1e-15, 'verbose': True}
mol_data = [THCContainer(mol, kwargs) for mol in mols]

  with h5py.File(chkfile) as fh5:
  h5py.File.__init__(self, filename, *args, **kwargs)


rho L2: 1.5681044424292648e-15
T_ao L_infinity: 5.551372972423252e-10
T_mo L_infinity: 6.640807946947658e-10
T_mo L_2: 3.5671881623186717e-09
rho L2: 5.210117043563397e-15
T_ao L_infinity: 4.6908578511533705e-06
T_mo L_infinity: 6.615021421296774e-06
T_mo L_2: 1.532191554624319e-05


In [4]:
dataset = []
for con in mol_data:
    
    print("E_J loss", np.linalg.norm(con.E[0] - con.E_THC[0]))
    print("E loss", np.linalg.norm(con.E[2] - con.E_THC[2]))
    print("MP2_J loss", np.linalg.norm(con.MP2[0] - con.MP2_THC[0]))
    print("MP2 loss", np.linalg.norm(con.MP2[2] - con.MP2_THC[2]))
    print(con.E[2].shape)
    print("")
    
    data = build_thc_graph(con)
#     data = Data(X = torch.from_numpy(X), Z = torch.from_numpy(Z),
#                 U = torch.from_numpy(U), coords = torch.from_numpy(coords),
#                 T_ao = torch.from_numpy(T_ao), T_mo = torch.from_numpy(T_mo),
#                mol = mol)
    
    dataset.append(data)

E_J loss 5.2466440453153826e-11
E loss 3.60994965108241e-11
MP2_J loss 1.0409315076564951e-10
MP2 loss 6.313343597907917e-11
(5, 4)

E_J loss 6.14238886117156e-08
E loss 4.0988257037528114e-08
MP2_J loss 1.2210252095523622e-07
MP2 loss 7.614180207171639e-08
(9, 7)



In [5]:
u, v = data.edge_index[:,3000]
u, v = u.item(), v.item()
print(data.x[u])
print(data.x[v])
print(data.edge_attr[3000])

tensor([-1.1036e+01,  2.0000e+00,  6.4574e-01,  0.0000e+00,  8.6842e-06,
         1.3423e-16, -3.6645e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       dtype=torch.float64)
tensor([-1.1036e+01,  2.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  1.0000e+00,  0.0000e+00, -5.2673e-03,  0.0000e+00],
       dtype=torch.float64)
tensor([ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0071,  0.0000],
       dtype=torch.float64)


In [6]:
for data in dataset:
    edge_attr = data.edge_attr
    print(torch.max(edge_attr, dim = 0)[0])
    print(torch.min(edge_attr, dim = 0)[0])
    print(torch.mean(edge_attr, dim = 0))
    
    print()

tensor([0.9923, 2.0000, 0.0000, 1.0000, 4.6944, 1.0000, 0.3954, 0.1283],
       dtype=torch.float64)
tensor([-1.9682e+01, -1.3914e-15, -2.1473e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00, -2.9366e-01, -1.0535e-01], dtype=torch.float64)
tensor([-1.5926e-02,  3.0763e-03, -7.4403e-05,  2.8840e-03,  1.5287e+00,
         6.6332e-03,  5.9229e-03,  2.3435e-04], dtype=torch.float64)

tensor([1.2833, 2.0000, 0.0000, 1.0000, 6.5856, 1.0000, 0.3984, 0.1557],
       dtype=torch.float64)
tensor([-2.2032e+01, -1.7099e-15, -2.2569e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00, -4.2863e-01, -1.7077e-01], dtype=torch.float64)
tensor([-5.8321e-03,  7.3435e-04, -1.7370e-05,  6.6907e-04,  2.1973e+00,
         2.6751e-03,  2.0843e-03,  6.2692e-05], dtype=torch.float64)



In [None]:
import torch.optim as optim

def train(model, loader, lr = 0.003, iterations = 10, verbose = False, lamb = 1.0, device = torch.device("cpu")):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    losses = []
    for i in range(iterations):
        batch_losses = []
        for data in loader:
                        
            E_THC = data.con.E_THC[0] # first term means the J term
            E_THC = torch.from_numpy(E_THC)
            E_hat = model(data)[data.E_mask][:,0].reshape(E_THC.shape)
            E_pred = E_THC + lamb * E_hat
            
            E_true = data.con.E[0] # first term means the J term
            E_true = torch.from_numpy(E_true)
            
            loss = torch.norm(E_true - E_pred) / torch.norm(E_true) #Scale regularization
                    
            optimizer.zero_grad()
            loss.backward()
            
            
            optimizer.step()
                        
            batch_losses.append(loss.item())

        batch_loss = np.mean(np.array(batch_losses))
        losses.append(batch_loss)
        if verbose:
            print("timestep: {}, loss: {:e}".format(i, batch_loss))
    
    model.eval()
    return losses

In [None]:
vertex_dim = dataset[0].x.shape[1]
edge_dim = dataset[0].edge_attr.shape[1]
hidden_dim = 20
model = THCNet(vertex_dim, edge_dim, hidden_dim).double()

lr = 0.001
verbose = True
lamb = 1e-1

losses = train(model, dataset, iterations = 200, lr = lr, verbose = verbose, lamb = lamb)