In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import networkx as nx
from scipy import sparse

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GINConv, global_max_pool, GlobalAttention, GatedGraphConv
from torch_geometric.data import Data, DataLoader
from torch_geometric.utils import softmax
from torch_geometric.utils.convert import from_scipy_sparse_matrix

from pyscf import gto, scf, tools, ao2mo


import model
import train
from model import SecondNet, SimpleNet
from preprocess import build_graph, build_qm7
from train import train, test
from hf import get_data, save_data, load_data

Numpy 1.16 has memory leak bug  https://github.com/numpy/numpy/issues/13808
It is recommended to downgrade to numpy 1.15 or older


In [2]:
mols = build_qm7('sto-3g')
#Omit first molecule, outlier geometry
mols = mols[:20]
filename = "sto33"

In [3]:
#TODO: Encode number of electrons explicitly
#TODO: Encode HF features?
#TODO: Encode the "flavor" of the orbital basis as features as well

save_data(mols, filename, force = True)
mol_data = load_data(filename, 'MO')[:10]

  with h5py.File(chkfile) as fh5:
  h5py.File.__init__(self, filename, *args, **kwargs)


In [4]:
#M: Number of orbitals
#N: Number of electrons
#F: feature vector length

#A is potential matrix: M x M
#U is coulumb 4-tensor: M x M x M x M
#X is additional orbital feature matrix: M x F_1
#Y is additional pairwise orbital feature matrix: M x M x F_2

#E is ground state energy
dataset = []
for mol in mol_data:
    
    A, U, X, Y, P, E, mo_occ = mol
                                
    data = build_graph(A, U, X, Y, P, E, mo_occ, epsilon = 0.0)

    dataset.append(data)

In [5]:
import random
random.shuffle(dataset)

split = int(0.8 * len(dataset))
train_loader = DataLoader(dataset[:split], batch_size = 2)
test_loader = DataLoader(dataset[split:], batch_size = 2)

In [6]:
import importlib
importlib.reload(model)
from model import SecondNet, SimpleNet


In [9]:
vertex_dim = dataset[0].x.shape[1]
edge_dim = dataset[0].edge_attr.shape[1]
hidden_dim = 20

train_criterion = nn.MSELoss()
test_criterion = nn.L1Loss()


np.set_printoptions(precision=8, suppress=True)

In [10]:
net = SecondNet(vertex_dim, edge_dim, hidden_dim, p = 0.0).double()
# net = SimpleNet(vertex_dim, edge_dim, hidden_dim, p = 0.0).double()


losses = train(net, train_loader, lr = 0.002, iterations = 100, criterion = train_criterion, verbose = True)
print(losses[::10])

loss = test(net, test_loader, test_criterion)
print(loss)


timestep: 0, loss: [0.00459341 1.31204173]
timestep: 1, loss: [0.00209961 0.18992052]
timestep: 2, loss: [0.00127826 0.12187536]
timestep: 3, loss: [0.00060934 0.16284255]
timestep: 4, loss: [0.00057647 0.05444107]
timestep: 5, loss: [0.00042228 0.03941029]
timestep: 6, loss: [0.00026837 0.04912176]
timestep: 7, loss: [0.00028097 0.04882609]
timestep: 8, loss: [0.00022702 0.03529826]
timestep: 9, loss: [0.00021101 0.03343455]
timestep: 10, loss: [0.00019217 0.04213438]
timestep: 11, loss: [0.00016708 0.04108452]
timestep: 12, loss: [0.00015271 0.02956089]
timestep: 13, loss: [0.00015091 0.02690331]
timestep: 14, loss: [0.00013573 0.02964537]
timestep: 15, loss: [0.00012565 0.02876005]
timestep: 16, loss: [0.00012112 0.02552007]
timestep: 17, loss: [0.00011392 0.02469886]
timestep: 18, loss: [0.00010719 0.02363961]
timestep: 19, loss: [0.00010135 0.02165481]
timestep: 20, loss: [0.00009633 0.02107371]
timestep: 21, loss: [0.00009004 0.02077105]
timestep: 22, loss: [0.00008414 0.01909219