## Make figures

In [1]:
import sys
sys.path.append("./src") # append to system path

from sklearn import cross_validation
from sklearn.metrics import r2_score
from sklearn.externals import joblib

import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')


# Helpers
def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.abs(np.subtract(true_y, pred_y) / true_y) * 100

def run_model(graph_path, model_path, inputs):
    # load graph and model 
    # return prediction on the inputs
    with tf.Session() as sess:
        new_saver = tf.train.import_meta_graph(graph_path)
        new_saver.restore(sess, model_path)
        X = tf.get_collection("X")[0]
        y = tf.get_collection("y")[0]
        pred = tf.get_collection("pred")[0]  
        results = sess.run(pred,feed_dict={X:inputs})
        return results
   
#descs
trn_descs = pd.read_csv('../data/descs/train/descs_Mar08_3839_train.csv',header=0,index_col=None).fillna(0).values
tst_descs = pd.read_csv('../data/descs/test/descs_Mar08_3839_test.csv',header=0,index_col=None).fillna(0).values



### Model Performances

In [2]:
### Change here
RS = 3
upper_lim=7
# load target
trn_target = pd.read_csv('../data/target/train/acidification_train.csv').values
tst_target = pd.read_csv('../data/target/test/acidification_test.csv').values
graph_path = '../nets/acidification/acidification_apr4.meta'
model_path = '../nets/acidification/acidification_apr4.ckpt'
this_scaler = joblib.load('../nets/acidification/scaler.pkl')
pca = joblib.load("../nets/acidification/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

In [3]:
pred_trn = run_model(graph_path, model_path, trn_X)
pred_val = run_model(graph_path, model_path, val_X)
pred_tst = run_model(graph_path, model_path, tst_X)

In [4]:
#log?
pred_trn = np.exp(pred_trn)
pred_val = np.exp(pred_val)
pred_tst = np.exp(pred_tst)

In [4]:
%matplotlib auto
# plt.switch_backend('Qt5Agg')
#make graph
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
MRE = mre(tst_target, pred_tst)
r2  = round(r2_score(tst_target, pred_tst),2)

tst_results = plt.plot(tst_target, pred_tst, '^', label = 'testing chemicals',zorder=10,markersize=10)
val_results = plt.plot(val_y, pred_val, 's', label = 'validation chemicals',zorder=5,markersize=10)
trn_results = plt.plot(trn_y, pred_trn, 'o',label = 'training chemicals',zorder=1,markersize=10)

plt.ylim([0,upper_lim])
plt.xlim([0,upper_lim])
ax.tick_params(axis='both',which='major',labelsize=15)

thisLine = plt.plot(np.append(0,upper_lim), np.append(0,upper_lim), label='perfect prediction line')

plt.plot([],[],linewidth=0, label='R2 on testing data: '+str(r2))
plt.legend(loc='upper left',numpoints=1)

plt.show()

Using matplotlib backend: MacOSX


## Compare model performances between different input sets

Use Three box plot to show the performance difference between models that were developed by full-descs, feature-selected descs and PCA descs.

In [4]:
import sys
sys.path.append("./src") # append to system path

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
# style.use('ggplot')

In [5]:
num_neurons = [16,64,128,512]

one_layer_full =[0.33,0.42,0.4,0.51]
one_layer_fs = [0.52,0.62,0.49,0.65]
one_layer_pca = [0.45,0.52,0.56,0.70]

two_layer_full = [0.55,0.44,0.33,0.42]
two_layer_fs = [0.52,0.68,0.65,0.65]
two_layer_pca = [0.58,0.69,0.75,0.74]

three_layer_full = [0.51, 0.45, 0.36, 0.29]
three_layer_fs = [0.68, 0.59, 0.42, 0.41]
three_layer_pca = [0.73,0.61,0.41,0.33]

In [69]:
%matplotlib auto
width_p = 0.25

fig,ax = plt.subplots(3,sharex=True,figsize=(8,11))

#make frame invisiable
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].yaxis.set_ticks_position('left') 
ax[0].xaxis.set_ticks_position('bottom') 

y_pos = np.arange(len(num_neurons))

#-------
# one hidden layer graph
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].yaxis.set_ticks_position('left') 
ax[0].xaxis.set_ticks_position('bottom') 

ax[0].bar([i-0.2 for i in y_pos],one_layer_full,width=width_p,align='center',color='lightgreen',label='full descriptors')
ax[0].bar(y_pos, one_layer_fs, width = width_p, align='center',color='lightskyblue',label='feature selection')
ax[0].bar([i+0.2 for i in y_pos], one_layer_pca, width=width_p, align='center',color='lightsalmon',label='PCA descriptors')
ax[0].legend(loc='upper left',prop={'size':9,'weight':'bold'})
ax[0].set_ylim([0,1])

#-------
# two hidden layer graph
ax[1].spines['top'].set_visible(False)
ax[1].spines['right'].set_visible(False)
ax[1].yaxis.set_ticks_position('left') 
ax[1].xaxis.set_ticks_position('bottom') 

ax[1].bar([i-0.2 for i in y_pos],two_layer_full,width=width_p,align='center',color='lightgreen',label='full descriptors')
ax[1].bar(y_pos, two_layer_fs, width = width_p, align='center',color='lightskyblue',label='feature selection')
ax[1].bar([i+0.2 for i in y_pos], two_layer_pca, width=width_p, align='center',color='lightsalmon',label='PCA descriptors')
ax[1].legend(loc='upper left',prop={'size':9,'weight':'bold'})
ax[1].set_ylim([0,1])

#-------
# three hidden layer graph
ax[2].spines['top'].set_visible(False)
ax[2].spines['right'].set_visible(False)
ax[2].yaxis.set_ticks_position('left') 
ax[2].xaxis.set_ticks_position('bottom') 

ax[2].bar([i-0.2 for i in y_pos],three_layer_full,width=width_p,align='center',color='lightgreen',label='full descriptors')
ax[2].bar(y_pos, three_layer_fs, width = width_p, align='center',color='lightskyblue',label='feature selection')
ax[2].bar([i+0.2 for i in y_pos], three_layer_pca, width=width_p, align='center',color='lightsalmon',label='PCA descriptors')
ax[2].legend(loc='upper left',prop={'size':9,'weight':'bold'})
ax[2].set_ylim([0,1])

#-------
fig.subplots_adjust(hspace=0.07)
plt.xticks(y_pos, num_neurons)
plt.show()
plt.savefig('../results/different_input_set.png')

Using matplotlib backend: MacOSX
