In [None]:
H5_FILE = "white_h_8192_dm2.h5"

In [None]:
###
###  TF Dataset loader for GWDA
###

import tensorflow as tf
import numpy as np
import h5py 
import matplotlib.pyplot as plt
import time
%matplotlib inline

tf.reset_default_graph()

with h5py.File(H5_FILE, 'r') as f:
    print( "Number of template, /train: ", len(f['/train_hp']) )
    print( "Number of template, /val  : ", len(f['/val_hp']) )
    print( "Number of template, /test : ", len(f['/test_hp']) )


class generator_gwda:
    def __init__(self, file, tag, amp, shift=0, noise_realization=2, noise=1):
        self.file = file
        self.amp   = amp
        self.tag   = tag
        self.shift = shift
        self.nreal = noise_realization
        np.random.seed(None)
    def __call__(self):
        
        with h5py.File(self.file, 'r') as f:
            LEN=len(f['%s_hp'%self.tag][0])
            for hp, m1, m2 in zip(f['%s_hp'%self.tag], f['%s_m1'%self.tag], f['%s_m2'%self.tag] ):
                
                ## shifted waveform to the reference point
                shifted = np.zeros(LEN)
                a = int((np.random.random()-0.5) * self.shift)
                shifted[max(0,a):min(LEN, LEN+a)] += self.amp * np.roll(hp, a)[max(0,a):min(LEN, LEN+a)]

                ## add noise realizations
                data =  np.vstack( 
                    ( np.random.normal(0, 1, (1,LEN)) + shifted,
                      np.random.normal(0, 1, (self.nreal,LEN))
                    )).astype(np.float32)
                
                ## labels: such as mass, spin,...
                m = np.zeros((self.nreal + 1, 2))
                m[0,:] = [m1,m2]

                yield data, m

BATCH = 2               
EXTF  = 2   # extend factor                

iterator     = tf.data.Iterator.from_structure(
    (tf.float32, tf.bool), 
    (tf.TensorShape([None, 8192]), tf.TensorShape([None]))
  )

next_element = iterator.get_next()
    
###
amp=1.0
ds_train  = tf.data.Dataset.from_generator( 
        generator_gwda(H5_FILE, tag="train", amp=amp, shift=2000), 
        (tf.float32, tf.float32),
        (tf.TensorShape([None, 8192]), tf.TensorShape([None, 2]) )    ## not needed actually  
    )
ds_train = ds_train.flat_map(lambda x,y: (tf.data.Dataset.zip( (
        tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( tf.cast( y[:,0], tf.bool ) )
        #tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( y[:,0] )
    ) ) ) )
ds_train = ds_train.shuffle(10*BATCH).repeat().batch(BATCH).prefetch(10*BATCH)
##ds_train_iter = ds_train.make_one_shot_iterator().get_next()

###
amp=1.0
ds_val  = tf.data.Dataset.from_generator( 
        generator_gwda(H5_FILE, tag="val", amp=amp, shift=2000), 
        (tf.float32, tf.float32), (tf.TensorShape([None, 8192]), tf.TensorShape([None, 2]) )
    )
ds_val = ds_val.flat_map(lambda x,y: (tf.data.Dataset.zip( (
        tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( tf.cast( y[:,0], tf.bool ) )   ## How to reshape the 2nd argument??
        #tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( y[:,0] )
    ) ) ) )
ds_val = ds_val.shuffle(EXTF*BATCH).repeat().batch(BATCH).prefetch(EXTF*BATCH)

### make a single iterator for train/val/test set with the same shape and type.
#iterator     = tf.data.Iterator.from_structure(ds_train.output_types, ds_train.output_shapes)

train_dsi_init = iterator.make_initializer(ds_train)
val_dsi_init = iterator.make_initializer(ds_val)

# Here is an example on how to read elements from the GWDA dataset
with tf.Session() as sess:
    sess.run(train_dsi_init)
    d, l = sess.run(next_element)
    print (d, l)
    
    sess.run(val_dsi_init)
    d, l = sess.run(next_element)
    print (d, l)
    
    sess.run(train_dsi_init)
    d, l = sess.run(next_element)
    print (d, l)
    #print (label)
    #plt.plot(data[0])

In [None]:
###
### Batch in, loss/prediction out
###
from tensorflow.contrib.learn import ModeKeys
tf.reset_default_graph()

iterator     = tf.data.Iterator.from_structure(
    (tf.float32, tf.bool), 
    (tf.TensorShape([None, 8192]), tf.TensorShape([None]))
  )
next_element = iterator.get_next()


LRATE = 1e-4

(xbatch, ybatch) = next_element

#tf.reset_default_graph()
DIM   = 8192

# ============================================== output logits
feature = tf.reshape(xbatch, [-1, DIM,1])
ybatch = tf.reshape(ybatch, [-1, 1])

args = {"padding":'valid', "activation":None,
        "kernel_initializer":tf.truncated_normal_initializer(), 
        "bias_initializer":tf.zeros_initializer()     }

with tf.variable_scope('', reuse=tf.AUTO_REUSE):
    def convl(in_, F, K, D, S, PO, PS, act, name):
        out = tf.layers.conv1d( in_, filters=F, kernel_size=K, dilation_rate=D, strides=S, **args)
        out = tf.layers.max_pooling1d(out, pool_size=PO, strides=PS, padding='valid')
        return act(out)

    o1 = convl(feature, F=16, K=16, D=1, S=1, PO=4, PS=4, act=tf.nn.relu, name="conv1")
    o2 = convl(o1,      F=32, K=8,  D=4, S=1, PO=4, PS=4, act=tf.nn.relu, name="conv2")
    o3 = convl(o2,      F=64, K=8,  D=4, S=1, PO=4, PS=4, act=tf.nn.relu, name="conv3")

    dim = o3.get_shape().as_list()
    fcnn = dim[1]*dim[2]
    o4 = tf.reshape(o3, [-1, fcnn])
    o4     = tf.layers.dense(o4, 64, activation=tf.nn.relu, name="fc1")
    logits = tf.layers.dense(o4, 1, activation=None, name="logit")
    # ================================================ End of Network

    ## with reduction compared to tf.nn.softmax_cross_entropy_with_logits_v2 
    loss_op = tf.losses.sigmoid_cross_entropy(logits=logits, multi_class_labels=ybatch)

    #optimizer = tf.train.AdadeltaOptimizer(LRATE, rho=0.90, epsilon=1e-08).minimize(loss_op)
    #optimizer = tf.train.GradientDescentOptimizer(LRATE).minimize(loss_op)
    optimizer = tf.train.AdamOptimizer(LRATE).minimize(loss_op)

    # Compute predictions
    predict_prob = tf.sigmoid(logits)
    predict      = tf.cast( tf.round(predict_prob), tf.int32 )
    

    accuracy_op, accuracy    = tf.metrics.accuracy(labels=ybatch, predictions=predict)
    _, sensitivity = tf.metrics.recall(labels=ybatch, predictions=predict)
    #    #_, sensitivity = tf.metrics.sensitivity_at_specificity(labels=y, predictions=predict, specificity=0.005)
    #
    #_, fp = tf.metrics.false_positives(labels=ybatch, predictions=predict)
    #_, fn = tf.metrics.false_negatives(labels=ybatch, predictions=predict)
    #_, tp = tf.metrics.true_positives(labels=ybatch, predictions=predict)
    #_, tn = tf.metrics.true_negatives(labels=ybatch, predictions=predict)

  
for var in tf.local_variables():
    print (var)
    
tf.summary.scalar('loss', loss_op)
tf.summary.scalar('accuracy', accuracy)
#tf.summary.scalar('recall/false_negatives',  tf.get_default_graph().get_tensor_by_name("recall/false_negatives/count:0") )


        

In [None]:
NUM_NOISE=30

DATA_LEN = 700*(NUM_NOISE+1)
BATCH = 128
ROOT_FOLDER = '/tmp/tf_tmp'

def train_for_amp(amp, BATCH=128, EPOCHS=100, MONITOR=2, PATIENCE=4, TOLLERENCE=1.e-7):

    ###  Data-set
    MAXSHIFT= 0
    
    print("Trainning for A= %f"% (amp ))
 
    ##################  train dataset ##################  TODO: to move into a function
    dst = tf.data.Dataset.from_generator( 
            generator_gwda(H5_FILE, 'train', amp, MAXSHIFT, noise_realization=NUM_NOISE), 
            (tf.float32, tf.float32),
            (tf.TensorShape([None, 8192]), tf.TensorShape([None, 2]) )    ## not needed actually  
        )
    dst = dst.flat_map(lambda x,y: (tf.data.Dataset.zip( (
            tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( tf.cast(y[:,0], tf.bool ) )
        ) ) ) )
    dst  = dst.shuffle(BATCH).repeat().batch(BATCH).prefetch(BATCH)
    ##################  validation dataset ##################
    dsv = tf.data.Dataset.from_generator(
        generator_gwda(H5_FILE, 'val', amp, MAXSHIFT, noise_realization=1), 
            (tf.float32, tf.float32),
            (tf.TensorShape([None, 8192]), tf.TensorShape([None, 2]) )    ## not needed actually  
        )
    dsv = dsv.flat_map(lambda x,y: (tf.data.Dataset.zip( (
            tf.data.Dataset.from_tensor_slices(x), tf.data.Dataset.from_tensor_slices( tf.cast(y[:,0], tf.bool ))
            ) ) ) )
    dsv = dsv.shuffle(BATCH).batch(BATCH).prefetch(BATCH)
    ##################            ##################

    ### make a single iterator for train/val/test set with the same shape and type.
    #iterator     = tf.data.Iterator.from_structure(ds_train.output_types, ds_train.output_shapes)
    
    train_dsi_init = iterator.make_initializer(dst)
    val_dsi_init = iterator.make_initializer(dsv)


    ##### summarize to a new folder
    ##train_writer = tf.summary.FileWriter("%s/train_%4.2f" % (ROOT_FOLDER, amp ) )
    ##train_writer.add_graph(tf.get_default_graph())
    ##print('Saving graph to: %s' % ROOT_FOLDER)


    with tf.Session() as sess:
        
        sess.run( [tf.global_variables_initializer(), tf.local_variables_initializer() ] )

        merged = tf.summary.merge_all()   ## operator for collecting TF summary
        
        patience = 0
        time0 = time.time()
        for e in range(EPOCHS):
        
            sess.run(train_dsi_init)
            STEPS   = int(DATA_LEN / BATCH)
            for i in range(STEPS):
                _, summary = sess.run( [optimizer, merged] )
                train_writer.add_summary(summary, global_step=e)
                #print("Step: %d, %d" % (i, l) )

            sess.run(val_dsi_init)
            ### evaluate    
            loss, acc, sen = sess.run( [loss_op, accuracy, sensitivity] )

            if e % MONITOR == 0:
                duration = time.time() - time0
                speed = STEPS * BATCH * (e+1) / duration
                print('  Epoch: %3d, loss: %10.3e acc: %4.2g sen: %4.2f sec: %8.1f speed: %7.1f wf/sec' 
                      % (e, loss, acc, sen, duration, speed) )
            if loss < TOLLERENCE:
                if patience > PATIENCE: break
                patience += 1
            else:
                patience = 0

        # Save model (variables)
        save_path = saver.save(sess, "%s/model_%4.2f.ckpt" % (ROOT_FOLDER, amp ) )
        print("Model saved at %s" % save_path)
    

In [None]:
###
### Main program
###
### if __name__ == '__main__':

np.random.seed(1)
    
##
##  Training with fixed template ....
##
###
saver = tf.train.Saver(max_to_keep=50)

TEST_LIST = [1.8, 1.7, 1.6, 1.5, 1.4, 1.3, 1.2, 1.1, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0]
TRAIN_A   = [1.1, 1.0, 0.9, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.54, 0.53, 0.52, 0.51, 0.5]
#TEST_LIST = [1.0]
#TRAIN_A   = [1.0]


## preload model here ?

for amp in TRAIN_A:
    ## summarize to a new folder for each Amp
    train_writer = tf.summary.FileWriter("%s/train_%4.2f" % (ROOT_FOLDER, amp ) )
    train_writer.add_graph(tf.get_default_graph())
    print('Saving graph to: %s' % ROOT_FOLDER)

    train_for_amp(amp)

    save_path = saver.save(sess, "%s/model_%4.2f.ckpt" % (ROOT_FOLDER, amp ) )
    print("Model saved at %s" % save_path)
