## create acid model with full descs without PCA

In [1]:
import sys
sys.path.append("./src") # append to system path

from sklearn import cross_validation
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib

import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.patches import Rectangle
style.use('ggplot')



In [2]:
def load_lcia_data(descs_p, target_p):
    X = pd.read_csv(descs_p,header=0,index_col=None)
    X = X.fillna(0)
    y = pd.read_csv(target_p,header=0,index_col=None)
    return X.values,y.values

def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs(np.subtract(true_y, pred_y) / true_y)) * 100

descs_p = '../data/descs/feature_selection/descs_Mar08_58.csv'
target_p = '../data/target/full/acidification.csv'
X,y = load_lcia_data(descs_p, target_p)

trn_X, val_X, trn_y, val_y = cross_validation.train_test_split(
    X, y, test_size=0.1, random_state=3)

In [3]:
print trn_X.shape, trn_y.shape

(149, 56) (149, 1)


## Data Preprocessing -- only scaler

In [4]:
## Standard Scaler
this_scaler = StandardScaler()
trn_X = this_scaler.fit_transform(trn_X)
val_X = this_scaler.transform(val_X)

print trn_X.shape

(149, 56)


## Building Model

In [5]:
def init_weights(shape):
    weights = tf.random_normal(shape,stddev = 0.1)
    return tf.Variable(weights)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

num_descs = trn_X.shape[1]
num_target = trn_y.shape[1]

print num_descs,num_target

56 1


In [7]:
##### 
##Define model structure

X = tf.placeholder(tf.float32,shape=[None,num_descs])
y = tf.placeholder(tf.float32,shape=[None,num_target])

tf.add_to_collection('X',X)
tf.add_to_collection('y',y)

#First layer
w1 = init_weights((num_descs,128)) 
b1 = bias_variable([128])
l1 = tf.add(tf.matmul(X,w1),b1)
l1 = tf.nn.sigmoid(l1)

# # Second layer
# w2 = init_weights((128,128))
# b2 = bias_variable([128])
# l2 = tf.add(tf.matmul(l1,w2),b2)
# l2 = tf.nn.sigmoid(l2)

# # Third layer
# w3 = init_weights((512,512))
# b3 = bias_variable([512])
# l3 = tf.add(tf.matmul(l2,w3),b3)
# l3 = tf.nn.relu(l3)

#Output layer
w_out = init_weights((128,num_target))
b_out = bias_variable([num_target])
l_out = tf.matmul(l1,w_out) + b_out #no nonlinarity

pred = l_out
tf.add_to_collection('pred',pred)

In [8]:
#static parameters
BATCH_SIZE = 1
BETA = 0.01 #regularization weights

#Define loss and optimizer 
#Add regularization term
regularizers = tf.nn.l2_loss(w1) + tf.nn.l2_loss(w_out)
cost = tf.reduce_mean(tf.square(pred - y) + BETA*regularizers)

#Gridient Descent Optimizer
optimizer = tf.train.AdagradOptimizer(learning_rate = 0.01).minimize(cost)


# Initializing the variables
init = tf.global_variables_initializer()

In [9]:
%matplotlib auto
#Start Training
costs=[]

#save the model
saver = tf.train.Saver()

with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    sess.run(init)
    for epoch in range(500):
        for i in range(0, len(trn_X),BATCH_SIZE):
            _, c = sess.run([optimizer,cost], feed_dict={X:trn_X[i:i+BATCH_SIZE], y:trn_y[i:i+BATCH_SIZE]})
        
        trn_score = r2_score(trn_y,sess.run(pred, feed_dict={X:trn_X, y:trn_y}))
        val_score = r2_score(val_y,sess.run(pred, feed_dict={X:val_X, y:val_y}))     
        val_mre = mre(val_y,sess.run(pred,feed_dict={X:val_X,y:val_y}))
        
        costs.append(val_score)
        if epoch % 100 == 0:
            print("Epoch = %d,Cost = %.2f,Training Accuracy = %.2f, Validation Accuracy = %.2f, Validation MRE =%.2f" % (epoch + 1,c,trn_score,val_score,val_mre))
  
    # final pred on the validation set
    final_pred_val = sess.run(pred,feed_dict={X:val_X})
    # prediction on the testing set

    plt.plot(costs)
    plt.show()
    
#     save_path = saver.save(sess, "../nets/acidification/acidification.ckpt")
#     saver.export_meta_graph("../nets/acidification/CED_apr4.meta")
#     print("Model saved in file: %s" % save_path)

Using matplotlib backend: MacOSX
Epoch = 1,Cost = 0.37,Training Accuracy = 0.26, Validation Accuracy = 0.26, Validation MRE =100.50
Epoch = 101,Cost = 0.19,Training Accuracy = 0.76, Validation Accuracy = 0.72, Validation MRE =60.16
Epoch = 201,Cost = 0.17,Training Accuracy = 0.77, Validation Accuracy = 0.72, Validation MRE =60.03
Epoch = 301,Cost = 0.17,Training Accuracy = 0.77, Validation Accuracy = 0.72, Validation MRE =60.13
Epoch = 401,Cost = 0.17,Training Accuracy = 0.77, Validation Accuracy = 0.72, Validation MRE =60.24


NameError: name 'tst_X' is not defined