# Prepare data for model

In [35]:
from tcn import TCN, tcn_full_summary
import tensorflow as tf
import os
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras import backend as K

def find_max_light(curve):
    id_max = curve[curve["cts"] != 0]['cts'].idxmax()
    max_data = curve.loc[id_max, :]
    return id_max, max_data

if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))


data_dir = "./TESS_data/processed_curves/"
files = os.listdir(data_dir)
batch_size = 100
timesteps = 30
n_features = 1

#format data properly
test_size = int(0.8 * len(files))

def min_max_norm(cts):
    return cts - cts.min() / (cts.max() - cts.min())

def format_data(data, include_uncertainty=False):
    x = []
    y = []

    for csv in data:
        df_x = pd.read_csv(data_dir + csv, index_col="relative_time")
        max_time, max_light = find_max_light(df_x)
        for t in range(df_x.index.size, 30):
            if not t in df_x.index:
                df_x.loc[t] = [0.0, 0.0]

        if include_uncertainty:
            df_y = np.array([max_time, max_light['e_cts']])
        else:
            df_y = max_time
            df_x = df_x['cts']

        x.append(df_x)
        y.append(df_y)

    #reshapes data to be (num_samples, timesteps, num_features), instead of (num_samples, timesteps)
    def add_feature_dim(input):
        return input.reshape(input.shape[0], input.shape[1], n_features)

    x_train = add_feature_dim(np.array(x[:test_size]))
    y_train = np.array(y[:test_size])
    x_test = add_feature_dim(np.array(x[test_size:]))
    y_test = np.array(y[test_size:])

    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = format_data(files)
# [num_samples, timesteps, num_features)
print(f"input shapes, training:{x_train.shape}", f"test:{x_test.shape}", f"total curves:{len(files)}")
# [num samples,]
print(f"output shapes, training:{y_train.shape}", f"test:{y_test.shape}", f"total curves:{len(files)}")

SUCCESS: Found GPU: /device:GPU:0
input shapes, training:(2616, 30, 1) test:(654, 30, 1) total curves:3270
output shapes, training:(2616,) test:(654,) total curves:3270


# Simple LSTM
## Training

In [38]:
# # model layers
# from keras import optimizers
# from keras import backend as K

print(np.any(~np.isfinite(x_train)))

model = Sequential()
model.add(tf.keras.layers.Masking(mask_value=0.,
                                  input_shape=(timesteps, n_features)))
model.add(LSTM(50, activation='relu', input_shape=(timesteps, n_features)))
model.add(Dense(1))
# optimizer = optimizers.Adam(clipvalue=.5)
# optimizer.learning_rate = 0.0001
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(x_train, y_train, epochs=200, batch_size=128)


False


## Prediction

In [32]:
yhat = model.predict(x_test, verbose=0)
print(yhat)

NameError: name 'stacked_model' is not defined

# Stacked LSTM
## Training

In [31]:
# model layers
stacked_model = Sequential()
stacked_model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(timesteps, n_features)))
stacked_model.add(LSTM(50, activation='relu'))
stacked_model.add(Dense(1))
stacked_model.compile(optimizer='adam', loss='mse')

# fit model
stacked_model.fit(x_train, y_train, epochs=200)


Epoch 1/200
10/82 [==>...........................] - ETA: 15s - loss: nan

KeyboardInterrupt: 

## Prediction

In [None]:
yhat = stacked_model.predict(x_test, verbose=0)
print(yhat)