# Propogate Errors

This notebook takes you through the steps of how to propogate errors for through the neural network model

* required packages: `numpy h5py keras`
* data files: 
    - starnet_cnn.h5
    - mean_and_std.npy
    - high_snr_test_data.h5
    - apStar_combined_main.h5

In [1]:
import numpy as np
from keras.models import load_model
import h5py
import tensorflow as tf

datadir = '/data/stars/'

Using TensorFlow backend.


Define path variables for your keras model, denormalization data, and test data

In [2]:
model_path = datadir + 'starnet_cnn.h5'
denormalization_path = datadir + 'mean_and_std.npy'
test_data_path = datadir + 'high_snr_test_data.h5'

**Define function to convert the keras model into a tensorflow graph and also a function that can load a tensorflow graph**

In [3]:
def convert_to_pb(weight_file,input_fld='',output_fld=''):
    
    import os
    import os.path as osp
    from tensorflow.python.framework import graph_util
    from tensorflow.python.framework import graph_io
    from keras.models import load_model
    from keras import backend as K
    
    
    # weight_file is a .h5 keras model file
    output_node_names_of_input_network = ["pred0"] 
    output_node_names_of_final_network = 'output_node'
    output_graph_name = weight_file[:-2]+'pb'
    weight_file_path = osp.join(input_fld, weight_file)
    
    net_model = load_model(weight_file_path)

    num_output = len(output_node_names_of_input_network)
    pred = [None]*num_output
    pred_node_names = [None]*num_output
    for i in range(num_output):
        pred_node_names[i] = output_node_names_of_final_network+str(i)
        pred[i] = tf.identity(net_model.output[i], name=pred_node_names[i])
        
    sess = K.get_session()
    
    constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
    graph_io.write_graph(constant_graph, output_fld, output_graph_name, as_text=False)
    print('saved the constant graph (ready for inference) at: ', osp.join(output_fld, output_graph_name))
    
    return output_fld+output_graph_name

def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the 
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the 
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def, 
            input_map=None, 
            return_elements=None, 
            name="prefix", 
            op_dict=None, 
            producer_op_list=None
        )
        
    input_name = graph.get_operations()[0].name+':0'
    output_name = graph.get_operations()[-1].name+':0'
    
    return graph, input_name, output_name

save model as a tensorflow graph

In [4]:
tf_model_path = convert_to_pb(model_path)

INFO:tensorflow:Froze 10 variables.
Converted 10 variables to const ops.
('saved the constant graph (ready for inference) at: ', '/data/stars/starnet_cnn.pb')


** Create a denormalization function **

In [5]:
mean_and_std = np.load(denormalization_path)
mean_labels = mean_and_std[0]
std_labels = mean_and_std[1]
num_labels = mean_and_std.shape[1]

def denormalize(lb_norm):
    return ((lb_norm*std_labels)+mean_labels)

**Load the StarNet model**

In [6]:
keras_model = load_model(model_path)

** Load Test Data **

In [7]:
def get_data(filename):
    f = h5py.File(filename, 'r')
    spectra_array = f['spectrum']
    err_spectra_array = f['error_spectrum']
    ap_ids = f['Ap_ID'][:]
    labels_array = np.column_stack((f['TEFF'][:],f['LOGG'][:],f['FE_H'][:]))
    return  (ap_ids, spectra_array, err_spectra_array, labels_array)

In [8]:
test_1_ap_ids, test_1_spectra, test_1_err_spectra, test_1_labels = get_data(test_data_path)
print('High S/N test set contains '  + str(len(test_1_spectra))+' stars')

High S/N test set contains 2651 stars


** Load entire APOGEE dataset**

This is necessary to obtain the an accurate assessment of the scatter between the model predictions and apogee labels

In [9]:
F = h5py.File(datadir + 'apStar_combined_main_dr13.h5', 'r')
all_apogee_spectra = F['spectrum']
all_apogee_labels = np.column_stack((F['TEFF'][:],F['LOGG'][:],F['FE_H'][:]))

**Predict on entire APOGEE dataset using batches**

In [10]:
# Define edges of detectors
blue_chip_begin = 322
blue_chip_end = 3242
green_chip_begin = 3648
green_chip_end = 6048   
red_chip_begin = 6412
red_chip_end = 8306 

all_apogee_pred = np.zeros((len(all_apogee_spectra),num_labels))

batch_size = 500

print('Predicting on entire APOGEE database of '+str(len(all_apogee_spectra))+' objects...')

for i in range(len(all_apogee_spectra)/batch_size):
    
    spectra = all_apogee_spectra[i*batch_size:(i+1)*batch_size]

    # Separate spectra into chips

    blue_sp = spectra[:,blue_chip_begin:blue_chip_end]
    green_sp = spectra[:,green_chip_begin:green_chip_end]
    red_sp = spectra[:,red_chip_begin:red_chip_end]

    #Normalize spectra by chips

    blue_sp_med = np.median(blue_sp, axis=1)
    green_sp_med = np.median(green_sp, axis=1)
    red_sp_med = np.median(red_sp, axis=1)

    blue_sp = (blue_sp.T/blue_sp_med).T
    green_sp = (green_sp.T/green_sp_med).T
    red_sp = (red_sp.T/red_sp_med).T  

    # Recombine spectra

    spectra = np.column_stack((blue_sp,green_sp,red_sp))

    # Reshape spectra
    spectra = spectra.reshape((spectra.shape[0],spectra.shape[1],1))

    all_apogee_pred[i*batch_size:(i+1)*batch_size] = denormalize(keras_model.predict(spectra))
    
    
spectra = all_apogee_spectra[(i+1)*batch_size:]

# Separate spectra into chips

blue_sp = spectra[:,blue_chip_begin:blue_chip_end]
green_sp = spectra[:,green_chip_begin:green_chip_end]
red_sp = spectra[:,red_chip_begin:red_chip_end]

#Normalize spectra by chips

blue_sp_med = np.median(blue_sp, axis=1)
green_sp_med = np.median(green_sp, axis=1)
red_sp_med = np.median(red_sp, axis=1)

blue_sp = (blue_sp.T/blue_sp_med).T
green_sp = (green_sp.T/green_sp_med).T
red_sp = (red_sp.T/red_sp_med).T  

# Recombine spectra

spectra = np.column_stack((blue_sp,green_sp,red_sp))

# Reshape spectra
spectra = spectra.reshape((spectra.shape[0],spectra.shape[1],1))

all_apogee_pred[(i+1)*batch_size:] = denormalize(keras_model.predict(spectra))

print('Predictions complete.')

Predicting on entire APOGEE database of 148724 objects...




Predictions complete.


**Calculate residuals between StarNet predictions and ASPCAP labels**

Exclude stars with bad ASPCAP labels

In [11]:
indices = np.where((all_apogee_labels[:,0]!=-9999.)&(all_apogee_labels[:,1]!=-9999.)&(all_apogee_labels[:,2]!=-9999.))

all_apogee_pred = all_apogee_pred[indices]
all_apogee_labels = all_apogee_labels[indices]

all_apogee_resids = all_apogee_pred - all_apogee_labels

**Separate residuals into different bins**

This is necessary to undestand how the scatter in different ranges of the label-space differs so that the appropriate scatter values are used when including the scatter in the error propogation 

In [12]:
resid_t1 = all_apogee_resids[np.where((all_apogee_labels[:,0]<4000)&(all_apogee_labels[:,0]>10))[0]]
resid_t2 = all_apogee_resids[np.where((all_apogee_labels[:,0]<4500)&(all_apogee_labels[:,0]>4000))[0]]
resid_t3 = all_apogee_resids[np.where((all_apogee_labels[:,0]<4750)&(all_apogee_labels[:,0]>4500))[0]]
resid_t4 = all_apogee_resids[np.where((all_apogee_labels[:,0]<5250)&(all_apogee_labels[:,0]>4750))[0]]
resid_t5 = all_apogee_resids[np.where((all_apogee_labels[:,0]>5250))[0]]

resid_l1 = all_apogee_resids[np.where((all_apogee_labels[:,1]<0.5)&(all_apogee_labels[:,1]>-10.))[0]]
resid_l2 = all_apogee_resids[np.where((all_apogee_labels[:,1]<1.5)&(all_apogee_labels[:,1]>0.5))[0]]
resid_l3 = all_apogee_resids[np.where((all_apogee_labels[:,1]<2.5)&(all_apogee_labels[:,1]>1.5))[0]]
resid_l4 = all_apogee_resids[np.where((all_apogee_labels[:,1]<3.5)&(all_apogee_labels[:,1]>2.5))[0]]
resid_l5 = all_apogee_resids[np.where((all_apogee_labels[:,1]>3.5))[0]]

resid_f1 = all_apogee_resids[np.where((all_apogee_labels[:,2]<-1.3)&(all_apogee_labels[:,2]>-10.))[0]]
resid_f2 = all_apogee_resids[np.where((all_apogee_labels[:,2]<-0.9)&(all_apogee_labels[:,2]>-1.3))[0]]
resid_f3 = all_apogee_resids[np.where((all_apogee_labels[:,2]<-0.5)&(all_apogee_labels[:,2]>-0.9))[0]]
resid_f4 = all_apogee_resids[np.where((all_apogee_labels[:,2]<-0.1)&(all_apogee_labels[:,2]>-0.5))[0]]
resid_f5 = all_apogee_resids[np.where((all_apogee_labels[:,2]>-0.1))[0]]

**Obtain a random sample of residuals from each bin**

Each sample has to be equal in size for proper statistical analysis

In [13]:
np.random.shuffle(resid_t1)
resid_t1 = resid_t1[0:1500]
np.random.shuffle(resid_t2)
resid_t2 = resid_t2[0:1500]
np.random.shuffle(resid_t3)
resid_t3 = resid_t3[0:1500]
np.random.shuffle(resid_t4)
resid_t4 = resid_t4[0:1500]
np.random.shuffle(resid_t5)
resid_t5 = resid_t5[0:1500]

np.random.shuffle(resid_l1)
resid_l1 = resid_l1[0:1500]
np.random.shuffle(resid_l2)
resid_l2 = resid_l2[0:1500]
np.random.shuffle(resid_l3)
resid_l3 = resid_l3[0:1500]
np.random.shuffle(resid_l4)
resid_l4 = resid_l4[0:1500]
np.random.shuffle(resid_l5)
resid_l5 = resid_l5[0:1500]

np.random.shuffle(resid_f1)
resid_f1 = resid_f1[0:1500]
np.random.shuffle(resid_f2)
resid_f2 = resid_f2[0:1500]
np.random.shuffle(resid_f3)
resid_f3 = resid_f3[0:1500]
np.random.shuffle(resid_f4)
resid_f4 = resid_f4[0:1500]
np.random.shuffle(resid_f5)
resid_f5 = resid_f5[0:1500]

**Calculate scatter in different regions, $\delta_{js}$**

In [14]:
std_resid_t1 = np.std(resid_t1, axis=0)[0]
std_resid_t2 = np.std(resid_t2, axis=0)[0]
std_resid_t3 = np.std(resid_t3, axis=0)[0]
std_resid_t4 = np.std(resid_t4, axis=0)[0]
std_resid_t5 = np.std(resid_t5, axis=0)[0]

std_resid_l1 = np.std(resid_l1, axis=0)[1]
std_resid_l2 = np.std(resid_l2, axis=0)[1]
std_resid_l3 = np.std(resid_l3, axis=0)[1]
std_resid_l4 = np.std(resid_l4, axis=0)[1]
std_resid_l5 = np.std(resid_l5, axis=0)[1]

std_resid_f1 = np.std(resid_f1, axis=0)[2]
std_resid_f2 = np.std(resid_f2, axis=0)[2]
std_resid_f3 = np.std(resid_f3, axis=0)[2]
std_resid_f4 = np.std(resid_f4, axis=0)[2]
std_resid_f5 = np.std(resid_f5, axis=0)[2]

** Create a function that returns the Jacobian matrix**

The jacobian matrix is a matrix of the first order derivatives of the outputs with respect to the input. In our case, this will be a 3-dimensional matrix with dimensions: (num_labels, num_test_spectra, num_flux_values).

Each spectrum will therefore have 3 vectors the length of the spectrum: one vector for each of the first order derivatives of the output labels with respect to each flux value (wavelength bin)

In [15]:
def compute_jacobian(model_path,input_data,denormalize=None):

    tf_model,tf_input,tf_output = load_graph(model_path)    
    
    x = tf_model.get_tensor_by_name(tf_input)
    
    if denormalize==None:
        y = tf_model.get_tensor_by_name(tf_output)
    else:
        y = denormalize(tf_model.get_tensor_by_name(tf_output))
        
    y_list = tf.unstack(y)
    num_outputs = y.shape.as_list()[0]
    
    if input_data.shape[0]==1:
        with tf.Session(graph=tf_model) as sess:
            y_out = sess.run([tf.gradients(y_, x)[0] for y_ in y_list], feed_dict={
                x: input_data
            })
            jacobian = np.asarray(y_out)
            jacobian = jacobian[:,:,:,0]
    else:
        print('\nCreating jacobian matrices for '+str(len(input_data))+' spectra...\n')
        print_count = int(len(input_data)/10)
        if print_count==0:
            print_count=1     
            
        jacobian = np.zeros((num_outputs,input_data.shape[0],input_data.shape[1]))
        for i in range(input_data.shape[0]):
            with tf.Session(graph=tf_model) as sess:
                y_out = sess.run([tf.gradients(y_, x)[0] for y_ in y_list], feed_dict={
                    x: input_data[i:i+1]
                })
            jac_temp = np.asarray(y_out)
            jacobian[:,i:i+1,:]=jac_temp[:,:,:,0]
            if (i+1)%print_count==0:
                print(str(i+1)+' jacobians completed...\n')
        print('All '+str(i+1)+' jacobians completed.\n')
    return jacobian

** Create a function that returns the Covariance Matrix **

Within the function, mask extremely high values in the error spectra and nan values in Jacobian

The high values in the error spectrum are associated - for the most part - with zero-values in the APOGEE spectra. Since these zero values are essentially ignored in the model (due to RELU-activation and maxpooling layers) they do not effect the output labels and therefore, the flux errors associated with these zero-values give an innaccurate assessment of the prediction errors. If you were to include these error fluxes, you would have massive uncertainties in some of the stars' output labels.

In [16]:
def compute_covariance(var,jac_matrix):
    var[var > 3] = 0
    jac_matrix = np.nan_to_num(jac_matrix)
    covariance = np.einsum('ijk,kjl->jil',(jac_matrix*(var**2)),jac_matrix.T)
    return covariance

** Compute Predictions and errors for the test set **
**Steps:**
1. compute predictions

    \begin{equation}
    h_(\textbf{x},\textbf{W}) =  h_{1}(\textbf{x},\textbf{W}),...,h_{j}(\textbf{x},\textbf{W}))
    \end{equation} 

        j = 3

2. compute jacobian matrix

    \begin{equation}
    Jac = \frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial \textbf{x}} =  (\frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial x_{1}},...,\frac{\partial h_{j}(\textbf{x},\textbf{W})}{\partial x_{n}})
    \end{equation} 

        j = 1,...,3

        n = 7214

3. compute covariance matrix

    \begin{equation}
    Cov = Jac \times \Delta \textbf{x}^2 \times Jac^T
    \end{equation}

4. obtain error due to error spectrum from the square root of the diagonal of the covariance matrix

    \begin{equation}
    \delta_{jx} = \sqrt{diag(Cov)}
    \end{equation}

5. determine which region of the label-space the labels are within to obtain the scatter in the corresponding bin

    \begin{equation}
    \delta_{js}
    \end{equation}
    
6. combine scatter with the error due to the error spectrum

    \begin{equation}
    \Delta h_{j} = \sqrt{\delta_{j\textbf{x}}^{2}  + \delta_{js}^{2}}
    \end{equation}

In [17]:
test_1_predictions = np.zeros((len(test_1_spectra),num_labels))
test_1_errors = np.zeros((len(test_1_spectra),num_labels))

print('Computing errors for '+str(len(test_1_predictions))+' objects...')

for i in range(len(test_1_spectra)):
    spectrum = test_1_spectra[i:i+1].reshape(1,7214,1)
    label = denormalize(keras_model.predict(spectrum))
    test_1_predictions[i] = label
    jacobian = compute_jacobian(tf_model_path,spectrum,denormalize)
    covariance = compute_covariance(test_1_err_spectra[i:i+1],jacobian)
    errors_from_err_spec = np.sqrt(np.diagonal(covariance, offset=0, axis1=1, axis2=2))
    label=label.T
    std_resid_temp = np.zeros((1,3))
    if (label[0]<4000) & (label[0]>10):
        std_resid_temp[0,0]=std_resid_t1
    elif (label[0]<4500) & (label[0]>4000):
        std_resid_temp[0,0]=std_resid_t2
    elif (label[0]<4750) & (label[0]>4500):
        std_resid_temp[0,0]=std_resid_t3
    elif (label[0]<5250) & (label[0]>4750):
        std_resid_temp[0,0]=std_resid_t4
    elif (label[0]<10000) & (label[0]>5250):
        std_resid_temp[0,0]=std_resid_t5
        
    if (label[1]<0.5) & (label[0]>-10):
        std_resid_temp[0,1]=std_resid_l1
    elif (label[1]<1.5) & (label[0]>0.5):
        std_resid_temp[0,1]=std_resid_l2
    elif (label[1]<2.5) & (label[0]>1.5):
        std_resid_temp[0,1]=std_resid_l3
    elif (label[1]<3.5) & (label[0]>2.5):
        std_resid_temp[0,1]=std_resid_l4
    elif (label[1]<100) & (label[0]>3.5):
        std_resid_temp[0,1]=std_resid_l5
    
    if (label[2]<-1.3) & (label[0]>-10):
        std_resid_temp[0,2]=std_resid_f1
    elif (label[2]<-0.9) & (label[0]>-1.3):
        std_resid_temp[0,2]=std_resid_f2
    elif (label[2]<-0.5) & (label[0]>-0.9):
        std_resid_temp[0,2]=std_resid_f3
    elif (label[2]<-0.3) & (label[0]>-0.5):
        std_resid_temp[0,2]=std_resid_f4
    elif (label[2]<0.5) & (label[0]>-0.3):
        std_resid_temp[0,2]=std_resid_f5
    
    if i%int(0.1*len(test_1_spectra))==0:
        print(str(i)+' errors competed...')
    
    test_1_errors[i] = np.sqrt(errors_from_err_spec+np.square(std_resid_temp))
    
print('Error propagation for '+str(len(test_1_errors))+' stars completed.')    

Computing errors for 2651 objects...
0 errors competed...
265 errors competed...
530 errors competed...
795 errors competed...
1060 errors competed...
1325 errors competed...
1590 errors competed...
1855 errors competed...
2120 errors competed...
2385 errors competed...
2650 errors competed...
Error propagation for 2651 stars completed.


In [21]:
# label names
label_names = ['Teff  ','log(g)','[Fe/H]']
units = ['K','cgs','dex']

mean_err_total = np.mean(test_1_errors, axis=0)
print('Mean total statistical errors: \n')
for i, err in enumerate(mean_err_total):
      print(label_names[i]+':  '+"{0:.3f}".format(err)+' '+units[i])

Mean total statistical errors: 

Teff  :  53.750 K
log(g):  0.251 cgs
[Fe/H]:  0.162 dex
