# Run the trained models and make predictions on each dataset

In [7]:
import sys
sys.path.append("./src") # append to system path

from sklearn import cross_validation
from sklearn.metrics import r2_score
from sklearn.externals import joblib

import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')


# Helpers
def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs(np.subtract(true_y, pred_y) / true_y)) * 100

def run_model(graph_path, model_path, inputs):
    # load graph and model 
    # return prediction on the inputs
    with tf.Session() as sess:
        new_saver = tf.train.import_meta_graph(graph_path)
        new_saver.restore(sess, model_path)
        X = tf.get_collection("X")[0]
        y = tf.get_collection("y")[0]
        pred = tf.get_collection("pred")[0]  
        results = sess.run(pred,feed_dict={X:inputs})
        return results
   
#descs
trn_descs = pd.read_csv('../data/descs/train/descs_Mar08_3839_train.csv',header=0,index_col=None).fillna(0).values
tst_descs = pd.read_csv('../data/descs/test/descs_Mar08_3839_test.csv',header=0,index_col=None).fillna(0).values

### CED models

In [3]:
### Change here
RS = 42
upper_lim=300
# load target
trn_target = pd.read_csv('../data/target/train/CED_train.csv').values
tst_target = pd.read_csv('../data/target/test/CED_test.csv').values
graph_path = '../nets/CED/CED_apr4.meta'
model_path = '../nets/CED/CED_apr4'
this_scaler = joblib.load('../nets/CED/scaler.pkl')
pca = joblib.load("../nets/CED/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

### Acidification Models

In [None]:
### Change here
RS = 3
upper_lim=7
# load target
trn_target = pd.read_csv('../data/target/train/acidification_train.csv').values
tst_target = pd.read_csv('../data/target/test/acidification_test.csv').values
graph_path = '../nets/acidification/acidification_apr4.meta'
model_path = '../nets/acidification/acidification_apr4.ckpt'
this_scaler = joblib.load('../nets/acidification/scaler.pkl')
pca = joblib.load("../nets/acidification/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

### EI99

In [None]:
### Change here
RS = 3
upper_lim=3
# load target
trn_target = pd.read_csv('../data/target/train/EI99_train.csv').values
tst_target = pd.read_csv('../data/target/test/EI99_test.csv').values
graph_path = '../nets/EI99/EI99_Apr4.meta'
model_path = '../nets/EI99/EI99_Apr4.ckpt'
this_scaler = joblib.load('../nets/EI99/scaler.pkl')
pca = joblib.load("../nets/EI99/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

### GWP

In [None]:
### Change here
RS = 42
upper_lim=20
# load target
trn_target = pd.read_csv('../data/target/train/GWP_train.csv').values
tst_target = pd.read_csv('../data/target/test/GWP_test.csv').values
graph_path = '../nets/GWP/GWP_Apr4.meta'
model_path = '../nets/GWP/GWP_Apr4.ckpt'
this_scaler = joblib.load('../nets/GWP/scaler.pkl')
pca = joblib.load("../nets/GWP/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

### Human Health

In [None]:
### Change here
RS = 42
upper_lim = 0.003
# load target
trn_target = pd.read_csv('../data/target/train/humanhealth_train.csv').values
tst_target = pd.read_csv('../data/target/test/humanhealth_test.csv').values
graph_path = '../nets/humanhealth/humanhealth_Apr4.meta'
model_path = '../nets/humanhealth/humanhealth_Apr4.ckpt'
this_scaler = joblib.load('../nets/humanhealth/scaler.pkl')
pca = joblib.load("../nets/humanhealth/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

### Ecosystem Quality

In [None]:
### Change here
RS = 3
upper_lim = 0.0006
# load target
trn_target = pd.read_csv('../data/target/train/ecosystemquality_train.csv').values
tst_target = pd.read_csv('../data/target/test/ecosystemquality_test.csv').values
graph_path = '../nets/ecosystemquality/ecosystemquality_Apr4.meta'
model_path = '../nets/ecosystemquality/ecosystemquality_Apr4.ckpt'
this_scaler = joblib.load('../nets/ecosystemquality/scaler.pkl')
pca = joblib.load("../nets/ecosystemquality/pca.pkl")


###

#split training and validation sets 
trn_X, val_X,trn_y, val_y = cross_validation.train_test_split(
    trn_descs, trn_target, test_size=0.1, random_state=RS)

#PCA
trn_X = pca.transform(this_scaler.transform(trn_X))
val_X = pca.transform(this_scaler.transform(val_X))
tst_X = pca.transform(this_scaler.transform(tst_descs))

## Transform prediction to normal scale if necessary
only for GWP, human health and ecosystem quality model

In [None]:
pred_trn = np.exp(pred_trn)
pred_val = np.exp(pred_val)
pred_tst = np.exp(pred_tst)

### Run Prediction

In [4]:
pred_trn = run_model(graph_path, model_path, trn_X)
pred_val = run_model(graph_path, model_path, val_X)
pred_tst = run_model(graph_path, model_path, tst_X)

In [9]:
trn_R2 = r2_score(trn_y,pred_trn)
trn_MRE = mre(trn_y,pred_trn)

val_R2 = r2_score(val_y,pred_val)
val_MRE = mre(val_y,pred_val)

tst_R2 = r2_score(tst_target,pred_tst)
tst_MRE = mre(tst_target,pred_tst)

print "Training R2 and MRE: ", trn_R2, trn_MRE
print "Validation R2 and MRE: ", val_R2, val_MRE
print "Testing R2 and MRE:", tst_R2, tst_MRE

Training R2 and MRE:  0.988579092577 2.60123905067
Validation R2 and MRE:  0.521809925494 39.8644821756
Testing R2 and MRE: 0.451893336059 41.642365571
