#### Imports

In [13]:
# hera stuff
import hera_cal as hc
import pyuvdata
import aipy
import uvtools

# general
import numpy as np
import random
import matplotlib.pyplot as plt
import sys, io # flushing notebook output
import time # for random seed

# NN
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

***
#### Data source

In [2]:
# load in data for a JD. 
data_directory = '../zen_data/'
miriad_path = data_directory + 'zen.2458098.58037.xx.HH.uv'

uvd = pyuvdata.UVData()
uvd.read_miriad(miriad_path)

# gains for same data 
calfits_path = data_directory + 'zen.2458098.58037.xx.HH.uv.abs.calfits'
gains, flags = hc.io.load_cal(calfits_path)

# get the redundancies for that data
aa = hc.utils.get_aa_from_uv(uvd)
info = hc.omni.aa_to_info(aa)
red_bls = np.array(info.get_reds())

***

#### Functions

In [3]:
# sometimes the antennas are bad, make sure they are good
def ants_good(bl, gain_keys):
    """Returns True if both antennas are in gains.keys"""
    ants = [a[0] for a in gain_keys] # gain_keys is a list of tuples like (ant, pol)
    if bl[0] in ants and bl[1] in ants:
        return True
    else:
        return False
    
def gen_flatness_from_good_red_bls(good_red_bls):

    num_keys = 0
    while num_keys < 2:

        data = {bl: uvd.get_data(bl) for bl in np.random.choice(good_red_bls) if ants_good(bl, gains.keys())}
        num_keys = len(data.keys())
        
    rnd_keys = random.sample(data.keys(), 2)
    
    a, b = rnd_keys[0][0], rnd_keys[0][1]
    c, d = rnd_keys[1][0], rnd_keys[1][1]
    S = data[a, b]*data[c, d].conjugate() * gains[(a,'x')].conjugate() * gains[(b,'x')] * gains[(c,'x')] * gains[(d,'x')].conjugate()
 
    return S

def gen_mega_flatness(good_red_bls, num = 250):
    mega_S = []
    for _ in range(num):
        mega_S.append(gen_flatness_from_good_red_bls(good_red_bls))
    return np.concatenate(mega_S, axis = 0)

def gen_plot(predicted_values, actual_values, output_scaler):
    """Create a prediction plot and save to byte string."""
    
    prediction_unscaled = output_scaler.inverse_transform(predicted_values)
    actual_unscaled = output_scaler.inverse_transform(actual_values)

    sorting_idx = np.argsort(actual_unscaled.T[0])
        
    fig, ax = plt.subplots(figsize = (5, 3), dpi = 144)

    ax.plot(prediction_unscaled.T[0][sorting_idx],
            linestyle = 'none', marker = '.', markersize = 1,
            color = 'darkblue')
    
    ax.plot(actual_unscaled.T[0][sorting_idx],
            linestyle = 'none', marker = '.', markersize = 1, alpha = 0.50,
            color = '#E50000')       
    
    ax.set_title('std: %.9f' %np.std(prediction_unscaled.T[0][sorting_idx] - actual_unscaled.T[0][sorting_idx]))
    
    buf = io.BytesIO()
    fig.savefig(buf, format='png', dpi = 144)
    plt.close(fig)
    buf.seek(0)

    return buf.getvalue()

def get_acc(predicted_values, actual_values, output_scaler, val = 0.0005):
    prediction_unscaled = output_scaler.inverse_transform(predicted_values)
    actual_unscaled = output_scaler.inverse_transform(actual_values)
    perc = abs(actual_unscaled.T[0] - prediction_unscaled.T[0])
    acc = np.array(perc < val, dtype = int)
    return np.mean(acc)

def clean_phase(num_entries = 15000, tau_low = -0.04, tau_high = 0.04):

    # random.uniform is half open [low, high) not sure if that matters
    tau = np.random.uniform(low = tau_low, high = tau_high, size = num_entries) 
    nu = np.arange(1024)
    
    clean_phase_data = np.empty((num_entries, 1024))
    for i in range(num_entries):
        clean_phase_data[i] = np.angle(np.exp(2*np.pi*1j * (tau[i]*nu + np.random.uniform())))
    
    return clean_phase_data, tau.reshape(-1, 1)

def get_bad_red_idxs(red_bls, gain_keys):
    x = 0
    bad_indices = []
    for i, bls in enumerate(red_bls):
        for bl in bls:
            if ants_good(bl, gain_keys) == True:
                pass
            else:
                x += 1
        if len(bls) == x:
            bad_indices.append(i)
        x = 0
    return bad_indices

***
#### Random good redundant bls

In [4]:
bad_indices = get_bad_red_idxs(red_bls, gains.keys()); print(bad_indices)

[6, 11, 16, 36]


In [5]:
good_red_bls = np.delete(red_bls, bad_indices)

In [6]:
# to get a random good red bls
random_red_bls = lambda : np.random.choice(good_red_bls)

***
NN

In [38]:
class NN(object):
    
    def __init__(self, layer_nodes, number_of_inputs, number_of_outputs, learning_rate, sigma = 0.00625):
        
        tf.reset_default_graph()
        
        with tf.variable_scope('input_X'):
            self.X  = tf.placeholder(tf.float32, shape = (None, number_of_inputs))
    
        with tf.variable_scope('input_y'):
            self.y  = tf.placeholder(tf.float32, shape = (None, 1))
            
        with tf.variable_scope('keep'):
            self.keep_prob = tf.placeholder(tf.float32)
            
        layers = []
        with tf.variable_scope('input_layer'):
            b = tf.get_variable(name = 'biases_input',
                                shape = [layer_nodes[0]],
                                initializer = tf.zeros_initializer())
            
            w = tf.get_variable(name = 'weights_input',
                                shape  = [number_of_inputs, layer_nodes[0]],
                                initializer = tf.contrib.layers.xavier_initializer())
            
            layer = tf.nn.leaky_relu(tf.matmul(self.X, w) + b)
            layers.append(layer)
        
        with tf.variable_scope('dropout'):
            dropout = tf.nn.dropout(layers[0], self.keep_prob)            
            layers.append(dropout)
            
        # hidden layers
        for i in range(len(layer_nodes)):
            if i > 0:
                with tf.variable_scope('layer_%d' %i):
                    b = tf.get_variable(name = 'biases_%d' %i,
                                        shape = [layer_nodes[i]],
                                        initializer = tf.zeros_initializer())
                    
                    w = tf.get_variable(name = 'weights_%d' %i,
                                        shape  = [layer_nodes[i-1], layer_nodes[i]],
                                        initializer = tf.contrib.layers.xavier_initializer())
                    
                    layer = tf.nn.relu6(tf.matmul(layers[i], w) + b)
                    layers.append(layer)
                    
        with tf.variable_scope('output_layer'):
            b = tf.get_variable(name = "biases_out",
                                shape = [number_of_outputs],
                                initializer = tf.zeros_initializer())
            
            w = tf.get_variable(name = "weights_out",
                                shape  = [layer_nodes[-1], number_of_outputs],
                                initializer = tf.contrib.layers.xavier_initializer())
            
            self.prediction = tf.nn.relu6(tf.matmul(layers[-1], w) + b)
            
        with tf.variable_scope('cost'):

            self.cost = tf.reduce_mean(tf.squared_difference(self.prediction, self.y))
            
        with tf.variable_scope('acc_test'):
            self.acc_test = tf.placeholder(tf.float32)
            
        with tf.variable_scope('acc_2'):
            
            dist = tf.contrib.distributions.Normal(0., sigma)
            self.acc = tf.divide(1.,1. + dist.prob(self.prediction - self.y))

     
        with tf.variable_scope('train'):
            self.optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-08).minimize(-dist.prob(self.prediction - self.y))

        with tf.variable_scope('image'):
            self.image_buf = tf.placeholder(tf.string, shape=[])
            epoch_image = tf.expand_dims(tf.image.decode_png(self.image_buf, channels=4), 0)
            
        with tf.variable_scope('logging'):                                                                                
            
            tf.summary.image('prediction_vs_actual', epoch_image)
            tf.summary.histogram('predictions', self.prediction)
            tf.summary.scalar('cost', self.cost)
            tf.summary.scalar('inverse_gaussian', tf.reduce_mean(self.acc))
            tf.summary.scalar('accuracy_5ns', self.acc_test)
            self.summary = tf.summary.merge_all()

In [40]:
number_of_inputs  = 1024
number_of_outputs =  1
layer_nodes = [2048,2048,2048]
learning_rate = 0.00001
sigma = 0.00625
NN_test_G = NN(layer_nodes, number_of_inputs, number_of_outputs, learning_rate, sigma)

testing_data_percentage = 0.20
keep_prob_rate = 0.50
model_save_interval = 10
num_epochs = 50
num_batches = 1000

In [None]:
network = NN_test_G
log_direc = 'logs/Ren-G'
saver = tf.train.Saver()
# inputs, outputs = 
with tf.Session() as session:

#     session.run(tf.global_variables_initializer())
    saver.restore(session,'logs/Ren-F/trained_model.ckpt-19')
    
    training_writer = tf.summary.FileWriter(log_direc + '/training', session.graph)
    testing_writer = tf.summary.FileWriter(log_direc + '/testing', session.graph)
    model_save_location = log_direc + '/trained_model.ckpt'    
    testing_cost = 0

    nu = np.arange(1024)
    for epoch in range(num_epochs):
        
        

#         inputs, outputs = clean_phase(num_entries = 15000, tau_low = -0.040, tau_high = 0.040)
#         if 0 <= epoch < 30:
#             inputs, outputs = clean_phase(num_entries = 10000, tau_low = -(0.010 + (epoch/1000.)), tau_high = 0.010 + epoch/1000.)
#         if 30 <= epoch < 60:
#             inputs, outputs = clean_phase(num_entries = 10000, tau_low = -0.040, tau_high = 0.040)
#         if 60 <= epoch:
        mega_delay = np.random.uniform(low = -.040, high = 0.040, size = (60 * 500, 1))
        inputs = np.angle((gen_mega_flatness(good_red_bls, num = 500)) * np.exp(2*np.pi*1j*(mega_delay*nu + np.random.uniform(size = (60*500, 1)))))
        outputs = mega_delay
        # scale data
        
        input_scaler  = MinMaxScaler(feature_range = (0,1))
        output_scaler = MinMaxScaler(feature_range = (0,1))
        scaled_input  =  input_scaler.fit_transform(inputs)
        scaled_output = output_scaler.fit_transform(outputs)
        
        #split data 
        X_train, X_test, y_train, y_test = train_test_split(scaled_input, scaled_output,
                                                test_size = testing_data_percentage,
                                                random_state = np.random.seed(int(time.time())))
        

        # break training data into batches
        split_permutation = np.random.permutation(X_train.shape[0])
        X_train_batches = np.vsplit(X_train[split_permutation], num_batches)
        y_train_batches = np.vsplit(y_train[split_permutation], num_batches)


        for i in range(num_batches):
            session.run(network.optimizer,
                        feed_dict = {network.X: X_train_batches[i], 
                                     network.y: y_train_batches[i], 
                                     network.keep_prob : keep_prob_rate})
            sys.stdout.write('\repoch: {:4.0f} -- testing_cost: {:2.10f} -- batch: {:1.0f}'.format(epoch, testing_cost,i))

        # training summaries
        prediction_scaled_test = session.run(network.prediction,
                                             feed_dict = {network.X: X_train,
                                                          network.keep_prob : 1.00})

        training_cost, training_summary = session.run([network.cost, network.summary],
                                                      feed_dict = {network.X: X_train,
                                                                   network.y: y_train,
                                                                   network.keep_prob : 1.00,
                                                                   network.image_buf: gen_plot(prediction_scaled_test,
                                                                                               y_train, output_scaler),
                                                                   network.acc_test : get_acc(prediction_scaled_test,
                                                                                               y_train, output_scaler)})
        training_writer.add_summary(training_summary, epoch)
        training_writer.flush()  

         # testing summaries
        prediction_scaled_test = session.run(network.prediction,
                                             feed_dict = {network.X: X_test,
                                                          network.keep_prob : 1.00})

        testing_cost, testing_summary = session.run([network.cost, network.summary],
                                                    feed_dict = {network.X: X_test,
                                                                 network.y: y_test,
                                                                 network.keep_prob : 1.00,
                                                                 network.image_buf: gen_plot(prediction_scaled_test,
                                                                                             y_test, output_scaler),
                                                                 network.acc_test: get_acc(prediction_scaled_test,
                                                                                             y_test, output_scaler)})

        testing_writer.add_summary(testing_summary, epoch)
        testing_writer.flush()

        # save model
        if (epoch + 1) % model_save_interval == 0:
            saver.save(session, model_save_location, epoch)

    # save last model
    saver.save(session, model_save_location, epoch)

INFO:tensorflow:Restoring parameters from logs/Ren-F/trained_model.ckpt-19
epoch:    0 -- testing_cost: 0.0000000000 -- batch: 86

three large layers (2048 each). epoch time ~90 seconds.

test_A: no learning. changing optimizer minimzation function to cost

test_B: 20 epochs brought pred near actl.

test_C: loading test_B weights, changing minimizatin to acc. preds drop to zero in first epoch.

test_D: loading test_B weights, increased sigma. preds o=dont drop to zero, though ineffecgive

test_E: loading test_B weights, changing minimizatin to cost. increased learning rate 5x compared to B. Not great.

test_F: loading test_B weights, dirty data. okay i guess.

test_G: F-19, negative normal minimize





