# Propogate Errors

This notebook takes you through the steps of how to propogate errors for through the neural network model

* required packages: `numpy h5py keras`
* data files: 
    - starnet_cnn.h5
    - mean_and_std.npy
    - test_data.h5
    - apStar_combined_main.h5

In [1]:
import numpy as np
from keras.models import load_model
import h5py
import tensorflow as tf
import time

datadir= ""

Using TensorFlow backend.


Define path variables for your keras model, denormalization data, and test data

In [2]:
model_path = datadir + 'starnet_cnn.h5'
denormalization_path = datadir + 'mean_and_std.npy'
test_data_path = datadir + 'test_data.h5'

**Define functions to:**

1. convert the keras model into a tensorflow graph
2. load a tensorflow graph
3. compute the jacobian matrix
4. compute the covariance
5. compute the variance

Note: these functions can be combined into one, but they are separated here to allow users to extract intermediate results for analysis

In [3]:
def keras_to_tf(weight_file,input_fld='',output_fld=''):
    
    import os
    import os.path as osp
    from tensorflow.python.framework import graph_util
    from tensorflow.python.framework import graph_io
    from keras.models import load_model
    from keras import backend as K
    
    
    # weight_file is a .h5 keras model file
    output_node_names_of_input_network = ["pred0"] 
    output_node_names_of_final_network = 'output_node'
    output_graph_name = weight_file[:-2]+'pb'
    weight_file_path = osp.join(input_fld, weight_file)
    
    net_model = load_model(weight_file_path)

    num_output = len(output_node_names_of_input_network)
    pred = [None]*num_output
    pred_node_names = [None]*num_output
    for i in range(num_output):
        pred_node_names[i] = output_node_names_of_final_network+str(i)
        pred[i] = tf.identity(net_model.output[i], name=pred_node_names[i])
        
    sess = K.get_session()
    
    constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
    graph_io.write_graph(constant_graph, output_fld, output_graph_name, as_text=False)
    print('saved the constant graph (ready for inference) at: ', osp.join(output_fld, output_graph_name))
    
    return output_fld+output_graph_name

def load_graph(frozen_graph_filename):

    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def, 
            input_map=None, 
            return_elements=None, 
            name="prefix", 
            op_dict=None, 
            producer_op_list=None
        )
        
    input_name = graph.get_operations()[0].name+':0'
    output_name = graph.get_operations()[-1].name+':0'
    
    return graph, input_name, output_name

def compute_jacobian_from_tf_model_path(tf_model_path,input_data,denormalize=None):

    tf_model,tf_input,tf_output = load_graph(tf_model_path)    
    
    x = tf_model.get_tensor_by_name(tf_input)
    
    if denormalize==None:
        y = tf_model.get_tensor_by_name(tf_output)
    else:
        y = denormalize(tf_model.get_tensor_by_name(tf_output))
        
    y_list = tf.unstack(y)
    num_outputs = y.shape.as_list()[0]
    
    if input_data.shape[0]==1:
        with tf.Session(graph=tf_model) as sess:
            y_out = sess.run([tf.gradients(y_, x)[0] for y_ in y_list], feed_dict={
                x: input_data
            })
            jacobian = np.asarray(y_out)
            jacobian = jacobian[:,:,:,0]
    else:
        print('\nCreating jacobian matrices for '+str(len(input_data))+' spectra...\n')
        print_count = int(len(input_data)/10)
        if print_count==0:
            print_count=1     
            
        jacobian = np.zeros((num_outputs,input_data.shape[0],input_data.shape[1]))
        for i in range(input_data.shape[0]):
            with tf.Session(graph=tf_model) as sess:
                y_out = sess.run([tf.gradients(y_, x)[0] for y_ in y_list], feed_dict={
                    x: input_data[i:i+1]
                })
            jac_temp = np.asarray(y_out)
            jacobian[:,i:i+1,:]=jac_temp[:,:,:,0]
            if (i+1)%print_count==0:
                print(str(i+1)+' jacobians completed...\n')
        print('All '+str(i+1)+' jacobians completed.\n')
    return jacobian

def compute_covariance_from_tf_model_path(tf_model_path,input_data,var,denormalize=None):
    jac_matrix = compute_jacobian_from_tf_model_path(tf_model_path,input_data,denormalize)
    var[var > 6] = 0
    jac_matrix = np.nan_to_num(jac_matrix)
    covariance = np.einsum('ijk,kjl->jil',(jac_matrix*(var**2)),jac_matrix.T)
    return covariance

def compute_variance_from_tf_model_path(tf_model_path,input_data,var,denormalize=None):
    covariance = compute_covariance_from_tf_model_path(tf_model_path,input_data,var,denormalize)
    return np.diagonal(covariance, offset=0, axis1=1, axis2=2)

save model as a tensorflow graph

In [4]:
tf_model_path = keras_to_tf(model_path)

INFO:tensorflow:Froze 10 variables.
Converted 10 variables to const ops.
('saved the constant graph (ready for inference) at: ', '/data/stars/starnet_cnn.pb')


** Create a denormalization function **

In [5]:
mean_and_std = np.load(denormalization_path)
mean_labels = mean_and_std[0]
std_labels = mean_and_std[1]
num_labels = mean_and_std.shape[1]

def denormalize(lb_norm):
    return ((lb_norm*std_labels)+mean_labels)

**Load the StarNet model**

In [6]:
keras_model = load_model(model_path)

** Load Test Data **

The error propagation technique takes some time, so for the purpose of example, we will only use the first 100 spectra in the test set

In [7]:
num_test = 300

f = h5py.File(test_data_path, 'r')
test_spectra = f['spectrum']
test_err_spectra = f['error_spectrum']
test_ap_ids = f['Ap_ID'][0:num_test]
test_labels = np.column_stack((f['TEFF'][0:num_test],f['LOGG'][0:num_test],f['FE_H'][0:num_test]))
print('Test set contains '  + str(len(test_ap_ids))+' stars')

Test set contains 300 stars


** Compute predictions and errors for the test set **

**Steps:**
1. compute predictions

    \begin{equation}
    h_(\textbf{x},\textbf{W}) =  h_{1}(\textbf{x},\textbf{W}),...,h_{j}(\textbf{x},\textbf{W}))
    \end{equation} 

        j = 3

2. compute jacobian matrix

    \begin{equation}
    Jac = \frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial \textbf{x}} =  (\frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial x_{1}},...,\frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial x_{n}})
    \end{equation} 

        j = 1,...,3

        n = 7214

3. compute covariance matrix

    \begin{equation}
    Cov = Jac \times \Delta \textbf{x}^2 \times Jac^T
    \end{equation}
    

4. obtain propagated variance due to error spectrum from the diagonal of the covariance matrix

    \begin{equation}
    \sigma_{\mathrm{prop}}^2 \approx diag(Cov)
    \end{equation}
    

5. determine which region of the label-space the labels are within to obtain the intrinsic scatter in the corresponding bin. These values have been predetermined from training StarNet on synthetic data and applying it to a test set of synthetic data

    \begin{equation}
    \sigma_{\mathrm{int}}
    \end{equation}
    
6. combine propagated error with the intrinsic scatter term

    \begin{equation}
    \Delta h_{j} = \sqrt{\sigma_{\mathrm{prop}}^2  + \sigma_{\mathrm{int}}^2}
    \end{equation}

In [8]:
variance = np.zeros((len(test_labels),3))
predictions = np.zeros(test_labels.shape)
print('Making predictions and computing propagated variance for '+str(len(test_labels))+' spectra')
time_start = time.time()
for i in range(len(test_labels)):
    spectrum = test_spectra[i:i+1].reshape((1,7214,1))
    err_spectrum = test_err_spectra[i:i+1]
    variance[i] = compute_variance_from_tf_model_path(tf_model_path,spectrum,err_spectrum,denormalize)
    predictions[i] = denormalize(keras_model.predict(spectrum))
    if i%int(0.1*len(test_labels))==0:
        print('\n'+str(i+1)+' completed.\n'+str(time.time()-time_start)+' seconds elapsed.')
print('\nAll '+str(i+1)+' completed.\n'+str(time.time()-time_start)+' seconds elapsed.')

Making predictions and computing propagated variance for 300 spectra

1 completed.
0.773883104324 seconds elapsed.

31 completed.
21.1932361126 seconds elapsed.

61 completed.
41.938945055 seconds elapsed.

91 completed.
62.3968989849 seconds elapsed.

121 completed.
82.8396980762 seconds elapsed.

151 completed.
103.467030048 seconds elapsed.

181 completed.
123.905740023 seconds elapsed.

211 completed.
144.359602928 seconds elapsed.

241 completed.
164.856469154 seconds elapsed.

271 completed.
185.443655968 seconds elapsed.

All 300 completed.
205.273967028 seconds elapsed.


** Create intrinsic scatter arrays (predetermined) **

In [9]:
scatter_terms = np.array([[  2.85209088e+01,   2.30193645e+01,   2.10676180e+01,
          1.91357425e+01,   1.72090644e+01,   1.58693655e+01,
          1.52684102e+01,   1.42387830e+01,   1.64239293e+01,
          2.18981017e+01],
       [  3.86073715e-02,   3.04916170e-02,   2.44161726e-02,
          2.25093310e-02,   2.35929675e-02,   2.36922221e-02,
          2.58764773e-02,   2.80946934e-02,   3.34534390e-02,
          3.56641714e-02],
       [  3.90793092e-02,   2.43149947e-02,   2.25292707e-02,
          1.81974298e-02,   1.58638867e-02,   1.46142515e-02,
          1.36038125e-02,   1.25392930e-02,   1.24740228e-02,
          1.53680421e-02]])
scatter_ranges = np.array([[  3.50000000e+03,   3.95000000e+03,   4.40000000e+03,
          4.85000000e+03,   5.30000000e+03,   5.75000000e+03,
          6.20000000e+03,   6.65000000e+03,   7.10000000e+03,
          7.55000000e+03,   8.00000000e+03],
       [  0.00000000e+00,   5.00000000e-01,   1.00000000e+00,
          1.50000000e+00,   2.00000000e+00,   2.50000000e+00,
          3.00000000e+00,   3.50000000e+00,   4.00000000e+00,
          4.50000000e+00,   5.00000000e+00],
       [ -2.50000000e+00,  -2.20000000e+00,  -1.90000000e+00,
         -1.60000000e+00,  -1.30000000e+00,  -1.00000000e+00,
         -7.00000000e-01,  -4.00000000e-01,  -1.00000000e-01,
          2.00000000e-01,   5.00000000e-01]])

** assign each spectrum an intrinsic scatter term depending on which region of the parameter-space the prediction lies **

In [10]:
scatter_errs = np.empty(test_labels.shape)

for i in range(scatter_terms.shape[0]):
    for j in range(scatter_terms.shape[1]):
        current_min = scatter_ranges[i,j]
        current_max = scatter_ranges[i,j+1]
        current_scatter = scatter_terms[i,j]
        index = np.where((test_labels[:,i]>current_min)&(test_labels[:,i]<current_max))[0]
        scatter_errs[index,i]=current_scatter

** combine the propagated error (or the square root of the variance) and intrinsic error in quadrature **

In [11]:
total_errors = np.sqrt(variance+np.square(scatter_errs))

In [12]:
# label names
label_names = ['Teff  ','log(g)','[Fe/H]']
units = ['K','cgs','dex']

mean_err_total = np.mean(total_errors, axis=0)
print('Mean total statistical errors: \n')
for i, err in enumerate(mean_err_total):
      print(label_names[i]+':  '+"{0:.3f}".format(err)+' '+units[i])

Mean total statistical errors: 

Teff  :  30.932 K
log(g):  0.064 cgs
[Fe/H]:  0.022 dex
