In [1]:
import keras
from keras.models import load_model, model_from_json
import sklearn.preprocessing
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import accuracy_score, roc_auc_score, r2_score, mean_absolute_error
from molSimplifyAD.utils.pymongo_tools import connect2db, push_models_published
from molSimplifyAD.mlclass_mongo import modelPublished

Using TensorFlow backend.


## Load your trained model

In [122]:
basepath = '/home/crduan/Package/molSimplify/molSimplify/tf_nn/homolumo/'

In [106]:
### if your model is a directly saved by model.save()
# model = load_model(basepath+'homo_empty_model.h5')

### if you save you model via json.
with open(basepath+'gap_model.json', 'r') as fo:
    loaded_model_json = fo.read()
model = model_from_json(loaded_model_json)
model.load_weights(basepath+'gap_model.h5')

## Load train/test information

In [117]:
## For JP's old models
# df_train = pd.read_csv(basepath+"ls_ii_bl_x.csv")
# y_train = pd.read_csv(basepath+"ls_ii_bl_y.csv").values
# # n_train = pd.read_csv(basepath+"train_names.csv").values

In [20]:
df_train = pd.read_csv(basepath+"gap_train_x.csv")
df_test = pd.read_csv(basepath+"gap_test_x.csv")
y_train = pd.read_csv(basepath+"gap_train_y.csv").values
y_test = pd.read_csv(basepath+"gap_test_y.csv").values
n_train = pd.read_csv(basepath+"gap_train_names.csv").values
n_test = pd.read_csv(basepath+"gap_test_names.csv").values

## Load your feature names

In [123]:
with open(basepath+"gap_vars.csv", "r") as fo:
    features = fo.readlines()
features =[f.split("\n")[0].strip('\r') for f in features]
target = "energeticgap"

In [109]:
y_train.shape,df_train.shape

((1373, 3), (1373, 154))

In [110]:
fnames, el = [], []
for f in df_train:
    std = np.std(df_train[str(f)].dropna().values)
    if std > 1e-6:
        fnames.append(f)
    else:
        el.append(f)

In [111]:
len(fnames)

154

## Standardize the train/test sets

In [112]:
X_train = df_train[fnames].values
X_test = df_test[fnames].values

In [113]:
x_scaler = sklearn.preprocessing.StandardScaler()
x_scaler.fit(X_train)
_X_train = x_scaler.transform(X_train)
_X_test = x_scaler.transform(X_test)
y_scaler= sklearn.preprocessing.StandardScaler()
### Use if regression
y_scaler.fit(y_train)
_y_train = y_scaler.transform(y_train)
_y_test = y_scaler.transform(y_test)

## Make predictions on the test set

In [114]:
# hat_y_train = y_scaler.inverse_transform(model.predict(_X_train))
hat_y_test = y_scaler.inverse_transform(model.predict(_X_test))

In [74]:
### for classifiers
# metrics = {"auc": roc_auc_score(y_test.reshape(-1,), hat_y_test.reshape(-1,))}
# metrics

In [30]:
metrics = {"r2": r2_score(y_test.reshape(-1,), hat_y_test.reshape(-1,)),
          "mae": mean_absolute_error(y_test.reshape(-1,), hat_y_test.reshape(-1,))}
metrics

{'mae': 0.2184421238081002, 'r2': 0.9445827830350787}

## Assemble model_dict (prepare your push)

In [115]:
# ## For JP's old models
# model_dict = {
#     "publication": "JP_JPCA_2017",
#     "doi": "10.1021/acs.jpca.7b08750",
#     "features": features,
#     "target": target,
#     "name_train": False,
#     "X_train": [X_train[ii].tolist() for ii in range(X_train.shape[0])],
#     "target_train": [y_train[ii].tolist() for ii in range(y_train.shape[0])],
#     "name_test": False,
#     "X_test": False,
#     "target_test": False,
#     "x_scaler": pickle.dumps(x_scaler),
#     "y_scaler": pickle.dumps(y_scaler),
#     "metrics": False
# }

In [103]:
model_dict = {
    "publication": "Nandy_IECR_2018",
    "doi": "10.1021/acs.iecr.8b04015",
    "features": features,
    "target": target,
    "name_train": [n_train[ii].tolist() for ii in range(n_train.shape[0])],
    "X_train": [X_train[ii].tolist() for ii in range(X_train.shape[0])],
    "target_train": [y_train[ii].tolist() for ii in range(y_train.shape[0])],
    "name_test": [n_test[ii].tolist() for ii in range(n_test.shape[0])],
    "X_test": [X_test[ii].tolist() for ii in range(X_test.shape[0])],
    "target_test": [y_test[ii].tolist() for ii in range(y_test.shape[0])],
    "x_scaler": pickle.dumps(x_scaler),
    "y_scaler": pickle.dumps(y_scaler),
    "metrics": metrics
}

AttributeError: 'bool' object has no attribute 'shape'

## Push your model

In [None]:
from os.path import expanduser
home = expanduser("~")
dbconfig = json.load(open(home + "/.db_config"))

In [116]:
push_models_published(model, model_dict,
                      database='tmc', collection='published_models',
                      user=dbconfig['user'], pwd=dbconfig['pwd'],
                      host="localhost", port=27017, auth=True,)

pushing...
Dumping database....

Done.


## Test whether your push is successful

In [119]:
db = connect2db(user=dbconfig['user'], pwd=dbconfig['pwd'],
                host="localhost", port=27017,
                database='tmc', auth=True)

In [120]:
db.published_models.count()

  """Entry point for launching an IPython kernel.


11