In [1]:
import tensorflow as tf
import os, warnings
import pandas as pd
warnings.filterwarnings('ignore')
import numpy as np
from logp.zq.io.smi2feat import load_feat_from_csv
from logp.zq.io.load_dataframe import load_pd
from logp.zq.model.model import potential_model
from sklearn.metrics import mean_absolute_error,mean_squared_error
from logp.zq.io.base import sparse_batch

In [2]:
#filename = 'logp/DATASETS/sf_ave_PhysProp_MD_weights_charge.csv'
#dataset = pd.read_csv(filename)
#dataset.shape

In [3]:
#results = load_pd(filename)

In [4]:
# load the predicting data in martel
filename_martel = 'logp/DATASETS/Martel/MD_max/weight_q/sf_max_Martel_MD_weights_charge.csv'
dataset_martel = pd.read_csv(filename_martel)
dataset_martel.shape
results_martel = load_pd(filename_martel)
dataset_martel = lambda: load_feat_from_csv(results_martel,24,split={'train':10,'test':0},shuffle=False)
martel_train = lambda: dataset_martel()['train'].apply(sparse_batch(5))

In [5]:
# load the predicting data in star
filename_star = 'logp/DATASETS/star_nonstar/MD_max/weight_q/sf_max_star_MD_weights_charge.csv'
dataset_star = pd.read_csv(filename_star)
dataset_star.shape
results_star = load_pd(filename_star)

dataset_star = lambda: load_feat_from_csv(results_star,24,split={'train':10,'test':0},shuffle=False)
star_train = lambda: dataset_star()['train'].apply(sparse_batch(5))

In [6]:
# load the predicting data in huusk
filename_nonstar = 'logp/DATASETS/Huuskonen/MD_max/weight_q/sf_max_Huuskonen_MD_weights_charge.csv'
dataset_nonstar = pd.read_csv(filename_nonstar)
dataset_nonstar.shape
results_nonstar = load_pd(filename_nonstar)

dataset_nonstar = lambda: load_feat_from_csv(results_nonstar,24,split={'train':10,'test':0},shuffle=False)
nonstar_train = lambda: dataset_nonstar()['train'].apply(sparse_batch(5))

In [7]:
lrs = [1e-2,1e-3, 1e-4, 1e-5]

logps_martel = {}
logps_star = {}
logps_nonstar = {}

for lr in lrs:

    logps_martel['lr_{}'.format(lr)] = []
    logps_star['lr_{}'.format(lr)] = []
    logps_nonstar['lr_{}'.format(lr)] = []

    for x in range(10):
        # define the parameters
        params = {
        'model_dir':'MD_max_weight_q_PhysProp/100_100_100/lr_{}/kfold_{}'.format(lr,x),
        'network':'bpnn',
        'network_params':{
        'nn_spec':{1:[100,100,100],6:[100,100,100],7:[100,100,100],8:[100,100,100]}},
        'model_params': {'learning_rate': lr}
        }
        #split the dataset into train and test
        ftrain = 'logp/DATASETS/PhysProps/MD_max/weight_q/sf_max_PhysProp_MD_weights_q_train_{}.csv'.format(x)
        ftest  = 'logp/DATASETS/PhysProps/MD_max/weight_q/sf_max_PhysProp_MD_weights_q_test_{}.csv'.format(x)
        dtrain = load_pd(ftrain)
        dtest = load_pd(ftest)
        dataset_train = lambda: load_feat_from_csv(dtrain,24,split={'train':10,'test':0})
        dataset_test = lambda: load_feat_from_csv(dtest,24,split={'train':0,'test':10})
        # yeild the train and test `input_fn`
        train = lambda: dataset_train()['train'].repeat().shuffle(1000).apply(sparse_batch(100))
        test = lambda: dataset_test()['test'].repeat().apply(sparse_batch(50))
    
        # initialize the model
        model = potential_model(params)
        # define the hooks
        early_stopping = tf.estimator.experimental.stop_if_no_decrease_hook(model, metric_name='loss',
                                                              max_steps_without_decrease=1000,
                                                              min_steps=100)
        # train and evaluate the model
        train_spec = tf.estimator.TrainSpec(input_fn=train, hooks=[early_stopping] ,max_steps=200000)
        eval_spec = tf.estimator.EvalSpec(input_fn=test, steps=400)
        tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
    
        # predict the data in martel 
        preds_martel = model.predict(input_fn=martel_train)
        preds_star = model.predict(input_fn=star_train)
        preds_nonstar = model.predict(input_fn=nonstar_train)
    
        logp_martel = []
        for pred in preds_martel:
            logp_martel.append(pred['logp'])
        logps_martel['lr_{}'.format(lr)].append(logp_martel)
    
        logp_star = []
        for pred in preds_star:
            logp_star.append(pred['logp'])
        logps_star['lr_{}'.format(lr)].append(logp_star)
    
        logp_nonstar = []
        for pred in preds_nonstar:
            logp_nonstar.append(pred['logp'])
        logps_nonstar['lr_{}'.format(lr)].append(logp_nonstar)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'MD_max_weight_q_PhysProp/100_100_100/lr_0.01/kfold_0', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordina

In [8]:
logps_martel_ave = {}
logps_star_ave = {}
logps_nonstar_ave = {}

for lr in lrs:
    # average the results
    logps_martel_tmp = np.array(logps_martel['lr_{}'.format(lr)])
    logps_martel_ave['lr_{}'.format(lr)] = np.mean(logps_martel_tmp,axis=0)
    #logps_martel_ave

    logps_star_tmp = np.array(logps_star['lr_{}'.format(lr)])
    logps_star_ave['lr_{}'.format(lr)] = np.mean(logps_star_tmp,axis=0)

    logps_nonstar_tmp = np.array(logps_nonstar['lr_{}'.format(lr)])
    logps_nonstar_ave['lr_{}'.format(lr)] = np.mean(logps_nonstar_tmp, axis=0)

In [9]:
# get the true value of martel
logp_exp_martel  = []
for x in results_martel:
    logp_exp_martel.append(x[3])

# get the true value of star
logp_exp_star = []
for x in results_star:
    logp_exp_star.append(x[3])
    
# get the true value of nonstar
logp_exp_nonstar = []
for x in results_nonstar:
    logp_exp_nonstar.append(x[3])

In [10]:
# calculate the mae, mse, and rmse of martel
for lr in lrs:
    mae_martel = mean_absolute_error(logps_martel_ave['lr_{}'.format(lr)],logp_exp_martel)
    mse_martel = mean_squared_error(logps_martel_ave['lr_{}'.format(lr)],logp_exp_martel)
    print('#-----{}------#'.format(lr))
    print(mae_martel,mse_martel,np.sqrt(mse_martel))

#-----0.01------#
0.7305077543285456 0.8750989468172934 0.9354672344969082
#-----0.001------#
0.8662658163476108 1.2895026652684725 1.1355627086464546
#-----0.0001------#
0.9416855667213376 1.660318490925609 1.2885334651942917
#-----1e-05------#
0.8867232949004165 1.40568851631043 1.1856173566165562


In [11]:
# calculate the mae, mse, and rmse of star
for lr in lrs:
    mae_star = mean_absolute_error(logps_star_ave['lr_{}'.format(lr)],logp_exp_star)
    mse_star = mean_squared_error(logps_star_ave['lr_{}'.format(lr)],logp_exp_star)
    print('#-----{}------#'.format(lr))
    print(mae_star,mse_star,np.sqrt(mse_star))

#-----0.01------#
1.0508350810957832 1.9036243225943548 1.3797189288381728
#-----0.001------#
0.6359736579303177 0.795303748035727 0.8917980421798015
#-----0.0001------#
0.5798313186566035 0.715589933685738 0.8459254894408479
#-----1e-05------#
0.5927400011200314 0.6319177019926682 0.7949325141121529


In [12]:
# calculate the mae, mse, and rmse of nonstar
for lr in lrs:
    mae_nonstar = mean_absolute_error(logps_nonstar_ave['lr_{}'.format(lr)],logp_exp_nonstar)
    mse_nonstar = mean_squared_error(logps_nonstar_ave['lr_{}'.format(lr)],logp_exp_nonstar)
    print('#-----{}------#'.format(lr))
    print(mae_nonstar,mse_nonstar,np.sqrt(mse_nonstar))

#-----0.01------#
0.5355292830071076 0.5601001035043623 0.7483983588332903
#-----0.001------#
0.2114724140194927 0.13947218621257135 0.37345975179739427
#-----0.0001------#
0.23172526935781576 0.1351622168748631 0.36764414435002646
#-----1e-05------#
0.34618648158731136 0.24742550890481388 0.49741884655169016
