In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import networkx as nx
from scipy import sparse

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GINConv, global_max_pool, GlobalAttention, GatedGraphConv
from torch_geometric.data import Data, DataLoader
from torch_geometric.utils import softmax
from torch_geometric.utils.convert import from_scipy_sparse_matrix
from torch_geometric.data import Data, DataLoader


from pyscf import gto, scf, tools, ao2mo


import train
from graph_model import SecondNet, SimpleNet, THCNet
from preprocess import build_qm7, build_thc_graph
from train import train, test
from thc import THCContainer
from utils import khatri_rao

Numpy 1.16 has memory leak bug  https://github.com/numpy/numpy/issues/13808
It is recommended to downgrade to numpy 1.15 or older


In [2]:
basis = 'sto-3g'
# basis = 'cc-pvdz'
mols = build_qm7(basis)
mols = mols[0:2]

In [3]:
kwargs = {'grid_points_per_atom': 10, 'epsilon_qr': 1e-15, 'epsilon_inv': 1e-15, 'verbose': True}
mol_data = [THCContainer(mol, kwargs) for mol in mols]

  with h5py.File(chkfile) as fh5:
  h5py.File.__init__(self, filename, *args, **kwargs)


rho L2: 8.739769471082871e-17
T_ao L_infinity: 3.5419481285062613
T_mo L_infinity: 3.5011169696917284
T_mo L_2: 4.6800868603748444
rho L2: 6.407716821774357e-17
T_ao L_infinity: 3.5419423271946395
T_mo L_infinity: 1.9263477573051362
T_mo L_2: 7.577257103125808


In [4]:
dataset = []
for con in mol_data:
    
    print("E_J loss", np.linalg.norm(con.E[0] - con.E_THC[0]))
    print("E loss", np.linalg.norm(con.E[2] - con.E_THC[2]))
    print("MP2_J loss", np.linalg.norm(con.MP2[0] - con.MP2_THC[0]))
    print("MP2 loss", np.linalg.norm(con.MP2[2] - con.MP2_THC[2]))
    print(con.E[2].shape)
    print("")
    
    data = build_thc_graph(con)
#     data = Data(X = torch.from_numpy(X), Z = torch.from_numpy(Z),
#                 U = torch.from_numpy(U), coords = torch.from_numpy(coords),
#                 T_ao = torch.from_numpy(T_ao), T_mo = torch.from_numpy(T_mo),
#                mol = mol)
    
    dataset.append(data)

E_J loss 0.0064803640378401535
E loss 0.004153715623926188
MP2_J loss 0.01275178495774533
MP2 loss 0.00714553245258371
(5, 4)

E_J loss 0.013447644992301163
E loss 0.00890297329760615
MP2_J loss 0.014744573909468234
MP2 loss 0.012345363714010585
(9, 7)



In [5]:
for data in dataset:
    edge_attr = data.edge_attr
    print(torch.max(edge_attr, dim = 0)[0])
    print(torch.min(edge_attr, dim = 0)[0])
    print(torch.mean(edge_attr, dim = 0))
    
    print()

tensor([ 0.9923,  2.0000,  1.0000, 12.2999,  1.0000,  0.2306,  0.0000],
       dtype=torch.float64)
tensor([-1.9682e+01, -1.3914e-15,  0.0000e+00,  0.0000e+00,  0.0000e+00,
        -1.7698e-01,  0.0000e+00], dtype=torch.float64)
tensor([-2.1529e-02,  4.1536e-03,  3.9252e-03,  3.9469e+00,  7.3000e-03,
         3.8903e-04,  0.0000e+00], dtype=torch.float64)

tensor([ 1.2833,  2.0000,  1.0000, 16.5238,  1.0000,  0.2691,  0.0000],
       dtype=torch.float64)
tensor([-2.2032e+01, -1.7099e-15,  0.0000e+00,  0.0000e+00,  0.0000e+00,
        -2.6083e-01,  0.0000e+00], dtype=torch.float64)
tensor([-1.4205e-02,  1.7728e-03,  1.6414e-03,  4.7341e+00,  3.7050e-03,
         7.3472e-04,  0.0000e+00], dtype=torch.float64)



In [10]:
import torch.optim as optim

def train(model, loader, lr = 0.003, iterations = 10, verbose = False, lamb = 1.0, device = torch.device("cpu")):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    losses = []
    for i in range(iterations):
        batch_losses = []
        for data in loader:
                        
            E_THC = data.con.E_THC[0] # first term means the J term
            E_THC = torch.from_numpy(E_THC)
            E_hat = model(data)[data.E_mask][:,0].reshape(E_THC.shape)
            E_pred = E_THC + lamb * E_hat
            
            E_true = data.con.E[0] # first term means the J term
            E_true = torch.from_numpy(E_true)
            
            loss = torch.norm(E_true - E_pred) / torch.norm(E_true) #Scale regularization
                    
            optimizer.zero_grad()
            loss.backward()
            
            
            optimizer.step()
                        
            batch_losses.append(loss.item())

        batch_loss = np.mean(np.array(batch_losses))
        losses.append(batch_loss)
        if verbose:
            print("timestep: {}, loss: {:e}".format(i, batch_loss))
    
    model.eval()
    return losses

In [11]:
vertex_dim = dataset[0].x.shape[1]
edge_dim = dataset[0].edge_attr.shape[1]
hidden_dim = 20
model = THCNet(vertex_dim, edge_dim, hidden_dim).double()

lr = 0.001
verbose = True
lamb = 1e-1

losses = train(model, dataset, iterations = 200, lr = lr, verbose = verbose, lamb = lamb)

timestep: 0, loss: 6.035934e-01
timestep: 1, loss: 5.435130e-01
timestep: 2, loss: 4.867439e-01
timestep: 3, loss: 4.318143e-01
timestep: 4, loss: 3.785291e-01
timestep: 5, loss: 3.261483e-01
timestep: 6, loss: 2.737901e-01
timestep: 7, loss: 2.205453e-01
timestep: 8, loss: 1.670143e-01
timestep: 9, loss: 1.160370e-01
timestep: 10, loss: 8.060720e-02
timestep: 11, loss: 8.322713e-02
timestep: 12, loss: 1.010259e-01
timestep: 13, loss: 1.048495e-01
timestep: 14, loss: 9.327526e-02
timestep: 15, loss: 7.500714e-02
timestep: 16, loss: 6.306815e-02
timestep: 17, loss: 6.444906e-02
timestep: 18, loss: 6.998090e-02
timestep: 19, loss: 7.155316e-02
timestep: 20, loss: 6.858520e-02
timestep: 21, loss: 6.423701e-02
timestep: 22, loss: 6.235106e-02
timestep: 23, loss: 6.359390e-02
timestep: 24, loss: 6.479669e-02
timestep: 25, loss: 6.377614e-02
timestep: 26, loss: 6.155884e-02
timestep: 27, loss: 6.032448e-02
timestep: 28, loss: 6.066803e-02
timestep: 29, loss: 6.138520e-02
timestep: 30, loss: 