In [1]:
import sys
sys.path.append("./src") # append to system path

from sklearn import cross_validation
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib

import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.patches import Rectangle
style.use('ggplot')



---
# Predict for ALL chemicals

In [None]:
def load_lcia_data(descs_p, target_p):
    X = pd.read_csv(descs_p,header=0,index_col=None)
    X = X.fillna(0)
    y = pd.read_csv(target_p,header=0,index_col=None)
    return X.values,y.values

def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((true_y - pred_y) / true_y)) * 100


descs_all = '../data/descs/old/descs_Mar08_3839.csv'
target_all = '../data/target/full/EI99.csv' 
X_all,y_all = load_lcia_data(descs_all, target_all)

## load scaler and pca

In [None]:
this_scaler = joblib.load("../nets/EI99/scaler.pkl")
pca = joblib.load("../nets/EI99/pca.pkl")

X_all = pca.transform(this_scaler.transform(X_all))
print X_all.shape

## load metagraph and restore model

In [None]:
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph("../nets/EI99/EI99_Apr4.meta")
    new_saver.restore(sess,"../nets/EI99/EI99_Apr4.ckpt")
    X = tf.get_collection("X")[0]
    y = tf.get_collection("y")[0]
    pred = tf.get_collection("pred")[0]  
    pred_all = sess.run(pred,feed_dict={X:X_all})

In [None]:
print r2_score(y_all,pred_all)

In [None]:
%matplotlib auto
MRE_this = mre(y_all, pred_all)
R2_this = r2_score(y_all, pred_all)
print R2_this
MRE_label = 'MRE: ' + str(round(MRE_this,2))

fig = plt.figure()
ax = fig.add_subplot(111)
est = plt.plot(y_all, pred_all,'o', label='estimated values')

max_val = max(max(y_all),max(pred_all))
plt.ylim([0,max_val+1])
plt.xlim([0,max_val+1])

thisLine = plt.plot(np.append(0,max_val+1), np.append(0,max_val+1), label='perfect prediction line')

plt.plot([],[],linewidth=0, label=MRE_label)
plt.legend(loc='upper left')
plt.show()

---
# Predict for validation chemicals

In [3]:
def load_lcia_data(descs_p, target_p):
    X = pd.read_csv(descs_p,header=0,index_col=None)
    X = X.fillna(0)
    y = pd.read_csv(target_p,header=0,index_col=None)
    return X.values,y.values

def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((true_y - pred_y) / true_y)) * 100

# load validation
descs_p = '../data/descs/train/descs_Mar08_3839_train.csv'
target_p = '../data/target/train/humanhealth_train.csv'
X,y = load_lcia_data(descs_p, target_p)

trn_X, val_X, trn_y, val_y = cross_validation.train_test_split(
    X, y, test_size=0.1, random_state=42)

### data preprocessing on validation set

In [4]:
this_scaler = joblib.load("../nets/humanhealth/scaler.pkl")
pca = joblib.load("../nets/humanhealth/pca.pkl")

trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
print val_X.shape

(16, 60)


In [5]:
# load metagraph and restore model, then predict on validatoon
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph("../nets/humanhealth/humanhealth_Apr4.meta")
    new_saver.restore(sess,"../nets/humanhealth/humanhealth_Apr4.ckpt")
    X = tf.get_collection("X")[0]
    y = tf.get_collection("y")[0]
    pred = tf.get_collection("pred")[0]  
    pred_val = sess.run(pred,feed_dict={X:val_X})

In [6]:
print np.exp(pred_val)
print r2_score(val_y,np.exp(pred_val))

[[  5.81834465e-04]
 [  3.84545623e-04]
 [  4.58760333e-04]
 [  2.72362697e-04]
 [  2.40460344e-04]
 [  6.86989748e-04]
 [  5.40414127e-04]
 [  9.42672486e-05]
 [  2.04816359e-04]
 [  1.72060158e-03]
 [  1.04849772e-04]
 [  2.78586638e-04]
 [  1.63755292e-04]
 [  9.66143387e-04]
 [  4.07055893e-04]
 [  2.11876104e-04]]
0.585217523468


---
### Predict for test chemicals

In [7]:
def load_lcia_data(descs_p, target_p):
    X = pd.read_csv(descs_p,header=0,index_col=None)
    X = X.fillna(0)
    y = pd.read_csv(target_p,header=0,index_col=None)
    return X.values,y.values

def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((true_y - pred_y) / true_y)) * 100

descs_tst = '../data/descs/test/descs_Mar08_3839_test.csv'
target_tst = '../data/target/test/humanhealth_test.csv'
tst_X,tst_y = load_lcia_data(descs_tst, target_tst)

In [8]:
this_scaler = joblib.load("../nets/humanhealth/scaler.pkl")
pca = joblib.load("../nets/humanhealth/pca.pkl")

tst_X = pca.transform(this_scaler.transform(tst_X))
print tst_X.shape

(10, 60)


In [9]:
# load metagraph and restore model, then predict on validatoon
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph("../nets/humanhealth/humanhealth_Apr4.meta")
    new_saver.restore(sess,"../nets/humanhealth/humanhealth_Apr4.ckpt")
    X = tf.get_collection("X")[0]
    y = tf.get_collection("y")[0]
    pred = tf.get_collection("pred")[0]  
    test_val = sess.run(pred,feed_dict={X:tst_X})

In [10]:
test_val = np.exp(test_val)
%matplotlib auto
MRE_this = mre(tst_y, test_val)
R2_this = r2_score(tst_y, test_val)
print R2_this
MRE_label = 'MRE: ' + str(round(MRE_this,2))

fig = plt.figure()
ax = fig.add_subplot(111)
est = plt.plot(tst_y, test_val,'o', label='estimated values')

max_val = max(max(tst_y),max(test_val))
plt.ylim([0,max_val])
plt.xlim([0,max_val])

thisLine = plt.plot(np.append(0,max_val), np.append(0,max_val), label='perfect prediction line')

plt.plot([],[],linewidth=0, label=MRE_label)
plt.legend(loc='upper left')
plt.show()

Using matplotlib backend: Qt5Agg
0.711348733215
