## Cross-validation for number of hidden neuron and hidden layers

In [1]:
import sys
sys.path.append("./src") # append to system path

from sklearn import cross_validation
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.patches import Rectangle
style.use('ggplot')



In [2]:
def load_lcia_data(descs_p, target_p):
    X = pd.read_csv(descs_p,header=0,index_col=None)
    X = X.fillna(0)
    y = pd.read_csv(target_p,header=0,index_col=None)
    return X.values,y.values

def mre(true_y,pred_y):
    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((true_y - pred_y) / true_y)) * 100

### Load Training data
use Kfold to create cross-validation dataset

In [3]:
descs_p = '../data/descs/train/descs_Mar08_3839_train.csv'
target_p = '../data/target/train/humanhealth_train.csv'
X,y = load_lcia_data(descs_p, target_p)

kf = KFold(n_splits=5,random_state=1)
print kf

KFold(n_splits=5, random_state=1, shuffle=False)


### Helper functions

In [None]:
def fit_descs(trn_X,val_X):
    #fit descriptors using standard scaler and PCA
    this_scaler = StandardScaler()
    pca = PCA(n_components = 40)
    
    trn_X = this_scaler.fit_transform(trn_X)
    trn_X = pca.fit_transform(trn_X)
    
    val_X = this_scaler.transform(val_X)
    val_X = pca.transform(val_X)
    return trn_X,val_X

def init_weights(shape):
    weights = tf.random_normal(shape,stddev = 0.1)
    return tf.Variable(weights)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def build_model_and_run(trn_X, trn_y, model_structure, lr=0.01,beta=0.01):
    # high level function to create NN model
    num_descs = trn_X.shape[1]
    num_target = trn_y.shape[1]
    
    X = tf.placeholder(tf.float32,shape=[None,num_descs])
    y = tf.placeholder(tf.float32,shape=[None,num_target])
    
    

In [None]:
# use this to spilt CV dataset
for trn,val in kf.split(X,y):
    trn_X = X[trn]
    val_X = X[val]

    trn_y = y[trn]
    val_y = y[val]    