In [1]:
import sys # required for relative imports in jupyter lab
sys.path.insert(0, '../') 

from cosmosis.model import FFNet
from dataset import Dummy, QM7, QM7b, QM7X, QM9, ANI1x
from learning import Learn, Selector

from torch.optim import Adam
from torch.nn import MSELoss, L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
model_params = {'D_in': 128, 
                'H': 512, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'make': 'make_regression',
             'make_params': {'n_samples': 10000,
                             'n_features': 128}}

opt_params = {'lr': 0.01}
crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10}
l = Learn(Dummy, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=MSELoss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False,
          batch_size=256, epochs=100)

In [None]:
model_params = {'D_in': 23*23, 
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=100)

In [None]:
model_params = {'D_in': 23*23+13, 
                'H': 4096, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'target': 'E', 
             'features': ['alpha_p','alpha_s','HOMO_g','HOMO_p','HOMO_z','LUMO_g',
                          'LUMO_p','LUMO_z','IP','EA','E1','Emax','Imax']}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7b, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=100)

In [None]:
model_params = {'D_in': 12*12, # nxn coulomb matrix
                'H': 2048, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'n': 133885, 
             'features': ['coulomb'], 
             'target': 'U0',
             'pad': 12, 
             'filter_on': ('n_atoms','>','12'),
             'use_pickle': False}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM9, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=32, epochs=100)

In [None]:
model_params = {'D_in': 64*63+63+63+63, 
                'H': 8192, 
                'D_out': 1, 
                'model_name': 'funnel'}
ds_params = {'features': ['atomic_numbers','coordinates'],
             'targets': ['wb97x_dz.energy'],
             'embed': [(9,64,True)],
             'pad': 63, #length of the longest molecule in the dataset
             'criterion': None,
             'conformation': 'random',
             'in_file': './data/ani1x/ani1x-release.h5'}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(ANI1x, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=128, epochs=10)

In [None]:
ds_params = {'features': 
                 ['DIP','HLgap','KSE','atC6','atNUM','atPOL','atXYZ', 
                 'eC','eDFTB+MBD','eEE','eH','eKIN','eKSE','eL','eMBD',
                 'eNE','eNN','ePBE0','ePBE0+MBD','eTS','eX','eXC','eXX',
                 'hCHG','hDIP','hRAT','hVDIP','hVOL','mC6','mPOL','mTPOL',
                 'pbe0FOR','sMIT','sRMSD','totFOR','vDIP','vEQ','vIQ',
                 'vTQ','vdwFOR','vdwR'],
             'pad': 654, #length of the longest molecule features
             'targets': ['eAT'],
             'embed': [(9,16,True)],
             'selector': ['i1-c1-opt'],
             'use_h5': True}

model_params = {'D_in': 654, 
                'H': 2048, 
                'D_out': 1, 
                'model_name': 'funnel'}

crit_params = {'reduction': 'sum'}
sample_params = {'set_seed': 88,
                 'splits': (.1,.8)}
sched_params = {'factor': .1,
                'patience': 10,
                'cooldown': 10}
opt_params = {'lr': 0.01}

l = Learn(QM7X, FFNet, Selector, Optimizer=Adam, Scheduler=ReduceLROnPlateau, Criterion=L1Loss, 
          model_params=model_params, ds_params=ds_params, sample_params=sample_params,
          opt_params=opt_params, sched_params=sched_params, crit_params=crit_params,
          adapt=False, load_model=False, load_embed=False, save_model=False, 
          batch_size=256, epochs=10)

#find the longest molecule
#qm7x = QM7X(**ds_params)
#l = 0
#for i in qm7x.ds_idx:
#    s = qm7x[i][0].shape.numel()
#    if s > l:
#        l = s
#print(l)

In [2]:
ds_params = {'features': 
                 ['DIP','HLgap','KSE','atC6','atNUM','atPOL','atXYZ', 
                 'eC','eDFTB+MBD','eEE','eH','eKIN','eKSE','eL','eMBD',
                 'eNE','eNN','ePBE0','ePBE0+MBD','eTS','eX','eXC','eXX',
                 'hCHG','hDIP','hRAT','hVDIP','hVOL','mC6','mPOL','mTPOL',
                 'pbe0FOR','sMIT','sRMSD','totFOR','vDIP','vEQ','vIQ',
                 'vTQ','vdwFOR','vdwR'],
             'pad': False, #length of the longest molecule features
             'targets': ['eAT'],
             'embed': [(9,16,True)],
             'selector': ['i1-c1-opt'],
             'use_h5': False}

qm7x = QM7X(**ds_params)

mapping...  <HDF5 file "1000.hdf5" (mode r)>
f[idmol][idconf].keys() <KeysViewHDF5 ['DIP', 'HLgap', 'KSE', 'atC6', 'atNUM', 'atPOL', 'atXYZ', 'eAT', 'eC', 'eDFTB+MBD', 'eEE', 'eH', 'eKIN', 'eKSE', 'eL', 'eMBD', 'eNE', 'eNN', 'ePBE0', 'ePBE0+MBD', 'eTS', 'eX', 'eXC', 'eXX', 'hCHG', 'hDIP', 'hRAT', 'hVDIP', 'hVOL', 'mC6', 'mPOL', 'mTPOL', 'pbe0FOR', 'sMIT', 'sRMSD', 'totFOR', 'vDIP', 'vEQ', 'vIQ', 'vTQ', 'vdwFOR', 'vdwR']>


AttributeError: 'tuple' object has no attribute 'encode'

In [None]:
qm7x.ds_idx[:10]

In [None]:
qm7x.datadic[1].keys()

In [None]:
qm7x.datadic[1]['idconf']

In [None]:
qm7x.datadic[1]['Geom-m1-i1-c1-opt']

In [None]:
qm7x[1][0].shape

In [None]:
qm7x.datadic[1]['idconf']