## Zero padding all of the data to make the time series of constant dimension

First, the vector is created as $M_1,T_1,E_1,M_2,T_2,E_2, \dots$. Then it is padded with zeros to make the time series length consistent for all stars.

In [1]:
#Reread in all of the data and group by star
import gzip 
with gzip.open('AllVar.phot.gz', 'rt') as f:
    temp = f.read()
temp2 = temp.split("\n")
del temp2[-1]
dataa = [[float(y) for y in x.split(',')] for x in temp2]
import itertools
import numpy as np 
groups = itertools.groupby(dataa, lambda x: x[0])
grouped_data = [list(g) for k,g in groups]

In [2]:
#Get the maximum size of a time-series
lengths = [len(list(zip(*a))[2]) for a in grouped_data]
#Multiply by 3 because we also include the time and error
l_pad = max(lengths)*3

In [3]:
#Allocate space for both the padded time series and the length of the unpadded features (will be used later for RNN)
feat_pad = np.empty((len(grouped_data),l_pad))
feat_len = np.empty(len(grouped_data))
i = 0
#Iterate through all stars and pad with zeros
for a in grouped_data:
    temp = np.ndarray.flatten(np.array(list(zip(*a))[1:4]), 'F')
    feat_pad[i,:] = np.pad(temp, (0,l_pad - len(temp)),'constant')
    feat_len[i] = len(temp)
    i += 1
np.save("Padded_TS", feat_pad)
np.save("Padded_TS_len",feat_len)

In [8]:
#One-hot encode the labels
truth = np.load("true_labels.npy")
labels = np.zeros((truth.size, truth.max()))
labels[np.arange(truth.size),truth-1] = 1

## A naive NN on the padded data

We hoped that this would improve the fit by using the data directly rather than the preprocessed features. However, without an absurd net size (leading to overfitting), there is no gain for the classification task. Again, we allow training and testing on all data.

In [9]:
#Import TF and change to float32 for use with TF
import tensorflow as tf
labels = np.float32(labels)
features = np.float32(feat_pad)

In [18]:
#Placeholders of correct size, but allowing for variable batch size
features_placeholder = tf.placeholder(features.dtype, [None,features.shape[1]])
labels_placeholder = tf.placeholder(labels.dtype, [None, labels.shape[1]])
#Parameters
learning_rate = 0.005
epoch = 100
batch_size = 64
display_step = 5
n_hidden_1 = l_pad
n_hidden_2 = l_pad
n_hidden_3 = l_pad
n_hidden_4 = l_pad
n_hidden_5 = 64
n_hidden_6 = 64
num_input = l_pad
num_classes = 17
n_star = len(truth)

In [19]:
#Iterator
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
dataset = dataset.batch(batch_size)
dataset = dataset.shuffle(buffer_size=100)
iterator = dataset.make_initializable_iterator()

#Condensed NN creation with TF2 (keras-like layer creation)
def neural_net(x):
    layer_1 = tf.layers.dense(x,n_hidden_1,activation=tf.nn.sigmoid)
    layer_2 = tf.layers.dense(layer_1,n_hidden_2,activation=tf.nn.relu)
    layer_3 = tf.layers.dense(layer_2,n_hidden_3,activation=tf.nn.relu)
    layer_4 = tf.layers.dense(layer_3, n_hidden_4, activation=tf.nn.relu)
    layer_5 = tf.layers.dense(layer_4, n_hidden_5, activation=tf.nn.relu)
    layer_6 = tf.layers.dense(layer_5, n_hidden_6, activation=tf.nn.relu)
    out_layer = tf.layers.dense(layer_6, num_classes)
    return out_layer

In [20]:
#Iterate the NN over a batch, then describe the loss and optimizer for training
feats, labs = iterator.get_next()
logits = neural_net(feats)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=labs))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
#Useful for accuracy output
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labs, 1))
accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.float32))
#Weight/bias initializer
init = tf.global_variables_initializer()

In [21]:
# Start training
with tf.Session() as sess:
    ep = 0
    #Initialize NN
    sess.run(init)

    for i in range(epoch):
        #Initialize iterator
        sess.run(iterator.initializer, feed_dict={features_placeholder: features,
                                                  labels_placeholder: labels})
        acc = 0
        #This loop runs on training data, breaking when it runs out of batches to feed in (finishing the epoch)
        while True:
            try:
                _,tempp = sess.run([train_op,accuracy])
                acc += tempp
            except tf.errors.OutOfRangeError:
                break
        ep += 1
        if ep % display_step == 0 or ep == 1:
            print("Epoch",ep, "Loss: ", acc / (n_star - n_star % batch_size))
    #print("Testing Accuracy:", \
    #sess.run(accuracy, feed_dict={features_placeholder: features,
    #                              labels_placeholder: labels}))

Epoch 1 Loss:  0.646545827633379
Epoch 5 Loss:  0.6539201436388509
Epoch 10 Loss:  0.6539201436388509
Epoch 15 Loss:  0.6539201436388509
Epoch 20 Loss:  0.6539201436388509
Epoch 25 Loss:  0.6539201436388509
Epoch 30 Loss:  0.6539201436388509


KeyboardInterrupt: 