In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import networkx as nx
from scipy import sparse

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GATConv, GINConv, global_max_pool, GlobalAttention, GatedGraphConv
from torch_geometric.data import Data, DataLoader
from torch_geometric.utils import softmax
from torch_geometric.utils.convert import from_scipy_sparse_matrix

from pyscf import gto, scf, tools, ao2mo


import model
import train
from model import SecondNet, SimpleNet
from preprocess import build_graph, build_qm7
from train import train, test
from hf import get_data, save_data, load_data

Numpy 1.16 has memory leak bug  https://github.com/numpy/numpy/issues/13808
It is recommended to downgrade to numpy 1.15 or older


In [2]:
mols = build_qm7('sto-3g')
#Omit first molecule, outlier geometry
mols = mols[:20]
filename = "sto33"

In [3]:
#TODO: Encode number of electrons explicitly
#TODO: Encode HF features?
#TODO: Encode the "flavor" of the orbital basis as features as well

save_data(mols, filename, force = True)
mol_data = load_data(filename, 'MO')[:3]

  with h5py.File(chkfile) as fh5:
  h5py.File.__init__(self, filename, *args, **kwargs)


In [4]:
#M: Number of orbitals
#N: Number of electrons
#F: feature vector length

#A is potential matrix: M x M
#U is coulumb 4-tensor: M x M x M x M
#X is additional orbital feature matrix: M x F_1
#Y is additional pairwise orbital feature matrix: M x M x F_2

#E is ground state energy
dataset = []
for mol in mol_data:
    
    A, U, X, Y, P, E, mo_occ = mol
                                
    data = build_graph(A, U, X, Y, P, E, mo_occ, epsilon = 0.0)

    dataset.append(data)

In [5]:
import random

random.shuffle(dataset)

split = int(0.8 * len(dataset))
train_loader = DataLoader(dataset[:split], batch_size = 2)
test_loader = DataLoader(dataset[split:], batch_size = 2)

In [10]:
import importlib
importlib.reload(model)
from model import SecondNet, SimpleNet


In [11]:
vertex_dim = dataset[0].x.shape[1]
edge_dim = dataset[0].edge_attr.shape[1]
hidden_dim = 20

train_criterion = nn.MSELoss()
test_criterion = nn.L1Loss()


np.set_printoptions(precision=8, suppress=True)

In [12]:
net = SecondNet(vertex_dim, edge_dim, hidden_dim, p = 0.0).double()
# net = SimpleNet(vertex_dim, edge_dim, hidden_dim, p = 0.0).double()

for data in dataset:
    print(data.E)

losses = train(net, train_loader, lr = 0.002, iterations = 300, criterion = train_criterion, verbose = True)
print(losses[::10])

loss = test(net, test_loader, test_criterion)
print(loss)


-0.0015226476972868294
-0.004276809273677748
-0.00801162633942964
timestep: 0, loss: [0.00262387 0.08838134]
timestep: 1, loss: [0.00032672 0.01379266]
timestep: 2, loss: [0.00042787 0.00546428]
timestep: 3, loss: [0.00035114 0.00009623]
timestep: 4, loss: [0.00007051 0.00019259]
timestep: 5, loss: [0.00006271 0.00022614]
timestep: 6, loss: [0.00013323 0.00057993]
timestep: 7, loss: [0.00008343 0.00135173]
timestep: 8, loss: [0.00001998 0.00099012]
timestep: 9, loss: [0.00002506 0.00027853]
timestep: 10, loss: [0.00003928 0.00044852]
timestep: 11, loss: [0.00002013 0.0001886 ]
timestep: 12, loss: [0.00001097 0.00042129]
timestep: 13, loss: [0.00001702 0.00083208]
timestep: 14, loss: [0.00001365 0.00046179]
timestep: 15, loss: [0.0000073  0.00019594]
timestep: 16, loss: [0.00000761 0.00014839]
timestep: 17, loss: [0.00000934 0.00015797]
timestep: 18, loss: [0.00000691 0.00016241]
timestep: 19, loss: [0.0000058  0.00017064]
timestep: 20, loss: [0.00000647 0.00023794]
timestep: 21, loss: 

timestep: 185, loss: [0.0000023 0.0000248]
timestep: 186, loss: [0.00000229 0.00002428]
timestep: 187, loss: [0.00000229 0.00002413]
timestep: 188, loss: [0.00000228 0.0000237 ]
timestep: 189, loss: [0.00000228 0.00002347]
timestep: 190, loss: [0.00000227 0.00002295]
timestep: 191, loss: [0.00000228 0.00002323]
timestep: 192, loss: [0.00000227 0.0000224 ]
timestep: 193, loss: [0.00000227 0.00002269]
timestep: 194, loss: [0.00000226 0.00002183]
timestep: 195, loss: [0.00000225 0.0000217 ]
timestep: 196, loss: [0.00000226 0.00002185]
timestep: 197, loss: [0.00000225 0.00002109]
timestep: 198, loss: [0.00000225 0.00002132]
timestep: 199, loss: [0.00000224 0.00002036]
timestep: 200, loss: [0.00000224 0.00002108]
timestep: 201, loss: [0.00000223 0.00001964]
timestep: 202, loss: [0.00000223 0.00002056]
timestep: 203, loss: [0.00000223 0.00001972]
timestep: 204, loss: [0.00000222 0.00001968]
timestep: 205, loss: [0.00000222 0.00001926]
timestep: 206, loss: [0.00000221 0.00001872]
timestep: 20

NameError: name 'test_loader' is not defined