In [1]:
import sys # required for relative imports in jupyter lab
sys.path.insert(0, '../') 

from cosmosis.model import FFNet
from dataset import SKmake, QM7, QM7b, QM7X, QM9, ANI1x
from learning import Learn, Selector

from torch.optim import Adam
from torch.nn import MSELoss, L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
model_params = {'D_in': 128, 
                'H': 512, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'make': 'make_regression',
             'make_params': {'n_samples': 10000,
                             'n_features': 128}}

opt_params = {'lr': 0.01}
crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.15,.7)}
sched_params = {'factor': .5,
                'patience': 2}

l = Learn(SKmake, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=MSELoss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False,
          batch_size=256, epochs=10)

In [None]:
model_params = {'D_in': 23*23, 
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=10)

In [None]:
model_params = {'D_in': 23*23+13, 
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'target': 'E', 
             'features': ['alpha_p','alpha_s','HOMO_g','HOMO_p','HOMO_z','LUMO_g',
                          'LUMO_p','LUMO_z','IP','EA','E1','Emax','Imax']}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7b, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=10)

In [None]:
model_params = {'D_in': 12*12, # nxn coulomb matrix
                'H': 2048, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'n': 133885, 
             'features': ['coulomb'], 
             'targets': ['U0'],
             'pad': 12, 
             'filter_on': ('n_atoms','>','12'),
             'use_pickle': 'n_atoms_less_than_12.p'}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM9, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=32, epochs=10)

In [None]:
model_params = {'D_in': 32*63+63+63+63, 
                'H': 8192, 
                'D_out': 1, 
                'model_name': 'funnel',
                'padding_idx': 0}

ds_params = {'features': ['atomic_numbers','coordinates'],
             'targets': ['wb97x_dz.energy'],
             'embed': [(6,32,False)],
             'pad': 63, #length of the longest molecule in the dataset
             'criterion': None,
             'conformation': 'random',
             'in_file': './data/ani1x/ani1x-release.h5'}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(ANI1x, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=128, epochs=10)

In [None]:
ds_params = {'features': ['atXYZ'],
             'pad': 23,
             'targets': ['eAT'],
             'embed': [('atNUM',9,64,True)],
             'selector': ['i1-c1-opt'],
             'use_h5': False}

model_params = {'D_in': 64*23+23+23+23, 
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel',
                'padding_idx': 0}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 20,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7X, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=10)

#find the longest molecule
#qm7x = QM7X(**ds_params)
#l = 0
#for i in qm7x.ds_idx:
#    s = qm7x[i][0].shape.numel()
#    if s > l:
#        l = s
#print(l)

In [None]:
model_params = {'D_in': 29*29+29*29+29*29+29, # nxn coulomb matrix
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel'}

ds_params = {'n': 133885, 
             'features': ['coulomb','adjacency','distance','mulliken'], 
             'targets': ['U0'],
             'pad': 29, 
             'filter_on': False,
             'use_pickle': 'qm9.test.p'}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.15,.7)}
sched_params = {'factor': .5,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM9, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=32, epochs=10)

In [None]:
ds_params = {'n': 133885, 
             'features': ['distance','coulomb','mulliken'],
             'embed': [('adjacency',4,16,0,True)],
             'targets': ['U0'],
             'pad': False, 
             'filter_on': False,
             'use_pickle': 'qm9.p'}

qm9 = QM9(**ds_params)

In [None]:
qm9[1]

In [None]:
ds_params = {'features': ['atXYZ'],
             'pad': False,
             'targets': ['eAT'],
             'embed': [('atNUM',9,64,0,True)],
             'selector': ['i1-c1-opt'],
             'use_h5': True}

qm7x = QM7X(**ds_params)

In [None]:
qm7x[1]

In [None]:
ds_params = {'features': ['coulomb','xyz'],
             'targets': ['ae'],
             'embed': [('atoms',7,16,0,True)],
             'in_file': './data/qm7/qm7.mat'}

qm7 = QM7(**ds_params)

In [None]:
qm7[0]

In [None]:
ds_params = {'features': ['coulomb','alpha_p','alpha_s','HOMO_g',
                          'HOMO_p','HOMO_z','LUMO_g','LUMO_p','LUMO_z',
                          'IP','EA','E1','Emax','Imax'],
            'targets': ['E'],
            'in_file': './data/qm7b/qm7b.mat'}

qm7b = QM7b(**ds_params)

In [None]:
qm7b[0]

In [2]:
ds_params = {'features': ['coordinates'],
             'targets': ['wb97x_dz.energy'],
             'embed': [('atomic_numbers',6,32,0,True)],
             'pad': 63, #length of the longest molecule in the dataset
             'criterion': None,
             'conformation': 'random',
             'in_file': './data/ani1x/ani1x-release.h5'}

ani1x = ANI1x(**ds_params)

structures loaded:  9342


In [6]:
ani1x['C1H4']

(tensor([ 7.0340e-03, -1.9342e-02,  9.9546e-04,  5.4263e-01,  9.5569e-01,
          8.7516e-02,  1.5425e-02, -4.9558e-01,  9.1806e-01,  4.5315e-01,
         -5.7917e-01, -7.4824e-01, -1.0534e+00,  2.3511e-01, -2.6331e-01,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.