In [1]:
# Imports
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [3]:
# Definitions

# Data layout in the xlsx files
columns_data = ['1' ,'2', '3', '4', '5', '6', '7', '8', 'N/A_1', 'N/A_2', 'angle', 'time', 'session']
columns_features_considered = columns_data[:8]
column_ground_truth = columns_data[10]
# Note that we ignore the 'time' column. That makes our data slightly imprecise as there are tiny, 
# TINY differences in time intervals in the real data (not worth modeling). Each timestep represents 
# 1 millisecond, 0.001 second. 

# Plot layout
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
# TODO: Try without this, see what happens?

def plot_to_file(plot, ATTEMPT_NAME, TITLE):
    plot.savefig(f'{ATTEMPT_NAME}/{TITLE}.png', bbox_inches='tight')

In [4]:
def run_all(DATASET_FILE_PATH, DATASET_SHEET_TITLE, GRANULARITY, 
            STEP_SIZE_SLIDING_WINDOW, PAST_HISTORY, 
            FUTURE_TARGET, VAL_PERCENT, TEST_PERCENT, 
            EPOCHS, BATCH_SPLITS, SMOOTHING, 
            ATTEMPT_NAME):
    raw_data = load_dataset(DATASET_FILE_PATH, DATASET_SHEET_TITLE, GRANULARITY)
    indexes, features, ground_truth = split_data(raw_data, GRANULARITY, SMOOTHING)
    plot_dataset(features, ground_truth, ATTEMPT_NAME)
    x_train, y_train, x_val, y_val, x_test, y_test = slice_data(indexes, features, ground_truth, VAL_PERCENT, 
                                                                TEST_PERCENT, PAST_HISTORY, FUTURE_TARGET, 
                                                                STEP_SIZE_SLIDING_WINDOW, GRANULARITY)
    batched_train_data, batched_val_data, batched_val_data = batch_data(x_train, y_train, x_val, y_val, 
                                                                        x_test, y_test, BATCH_SPLITS, EPOCHS)
    model = compile_model(x_train, FUTURE_TARGET)
    training_history = fit_model(model, x_train, x_val, batched_train_data, batched_val_data, BATCH_SPLITS, EPOCHS)
    plot_results(training_history, model, batched_val_data, indexes, ground_truth)

In [5]:
def load_dataset(DATASET_FILE_PATH, DATASET_SHEET_TITLE, GRANULARITY):
    # Read sheet 1 (table of contents), find index of entry with correct title, then load the corresponding excel sheet
    table_of_contents = pd.read_excel(DATASET_FILE_PATH, sheet_name=0, header=None)
    sheet_index = table_of_contents[table_of_contents[0] == f"{DATASET_SHEET_TITLE}_raw_data"][0].index[0]
    sheet_data = pd.read_excel(DATASET_FILE_PATH, sheet_name=sheet_index + 1, header=None)
    sheet_data.columns = columns_data
    return sheet_data

In [6]:
def split_data(raw_data, GRANULARITY, SMOOTHING):
    indexes = range(0, len(raw_data), 1)[::GRANULARITY] # Each timestep is a millisecond
    features = raw_data[columns_features_considered][::GRANULARITY].ewm(span=SMOOTHING).mean()
    ground_truth = pd.DataFrame(raw_data[column_ground_truth][::GRANULARITY]).ewm(span=SMOOTHING).mean()
    return indexes, features, ground_truth
    
    # PERHAPS TRY THIS?
    #data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
    #data_std = dataset[:TRAIN_SPLIT].std(axis=0)
    #dataset = (dataset-data_mean)/data_std

In [7]:
def plot_dataset(features, ground_truth, ATTEMPT_NAME):
    # plot_to_file(features.plot(subplots=True), ATTEMPT_NAME, "features"):
    # plot_to_file(ground_truth.plot(), ATTEMPT_NAME, "ground_truth"):
    features.plot(subplots=True)
    ground_truth.plot()

In [15]:
# Create array of all sliding windows of the data
def multivariate_data(dataset_features, dataset_ground_truth, start_index, end_index, history_size,
                      target_size, step, granularity, single_step=False, print_index=False):
    data, labels = [], []
    start_index = start_index + history_size 
    if end_index is None:
        end_index = len(dataset_features) - target_size 
    if print_index: print("start")
    for i in range(start_index, end_index): # start 100, end 790. 
        if print_index: print("A", i,)
        indices = range(i-history_size, i, step) # range(0, 100) step size of 1          --- our sliding window
        data.append(dataset_features[indices]) # append new array that contains all values within our sliding window
        if single_step:
            labels.append(dataset_ground_truth[i+target_size])
        else:
            labels.append(dataset_ground_truth[i:i+target_size])
    return np.array(data), np.array(labels)


def slice_data(indexes, features, ground_truth, VAL_PERCENT, TEST_PERCENT, PAST_HISTORY, FUTURE_TARGET, 
               STEP_SIZE_SLIDING_WINDOW, GRANULARITY):

    dataset = features.values
    observations = len(dataset)
    train_split = int(observations * (1 - VAL_PERCENT - TEST_PERCENT))
    val_split = int(observations * (1 - VAL_PERCENT))
    
    plt.plot(indexes, dataset)
    # savefig(f'{ATTEMPT_NAME}/dataset.png', bbox_inches='tight')

    
    
    x_train, y_train = multivariate_data(dataset, ground_truth.values, 0,
                                         train_split, PAST_HISTORY, FUTURE_TARGET, 
                                         STEP_SIZE_SLIDING_WINDOW, GRANULARITY, single_step = False, 
                                         print_index = False)
    x_val, y_val = multivariate_data(dataset, ground_truth.values, train_split, 
                                         val_split, PAST_HISTORY, FUTURE_TARGET, 
                                         STEP_SIZE_SLIDING_WINDOW, GRANULARITY, single_step=False, 
                                         print_index = False)
    x_test, y_test = multivariate_data(dataset, ground_truth.values, val_split, 
                                         None, PAST_HISTORY, FUTURE_TARGET, 
                                         STEP_SIZE_SLIDING_WINDOW, GRANULARITY, single_step=False, 
                                         print_index = False)
    
    return x_train, y_train, x_val, y_val, x_test, y_test

In [14]:
def batch_data(x_train, y_train, x_val, y_val, x_test, y_test, BATCH_SPLITS, EPOCHS):
    batched_train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(BATCH_SPLITS).repeat(EPOCHS)
    batched_val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(BATCH_SPLITS).repeat(EPOCHS)
    batched_test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SPLITS) # no repeat
    return batched_train_data, batched_val_data, batched_val_data

In [11]:
def compile_model(x_train, FUTURE_TARGET):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(32, input_shape=x_train.shape[-2:], return_sequences = False))
    model.add(tf.keras.layers.Dense(FUTURE_TARGET))
    model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae',metrics=['categorical_accuracy'])
    model.summary()
    return model

In [12]:
def fit_model(model, x_train, x_val, batched_train_data, batched_val_data, BATCH_SPLITS, EPOCHS):
    eval_interval = len(x_train) // BATCH_SPLITS 
    val_steps = len(x_val) // BATCH_SPLITS
    # TODO: WHAT DO THESE TWO VALUES TRULY MEAN? ARE THEY WORTH EXPERIMENTING WITH?

    training_history = model.fit(batched_train_data, epochs=EPOCHS,
                        steps_per_epoch=eval_interval,
                        validation_data=batched_val_data,
                        validation_steps=val_steps)
    return training_history

In [13]:
def plot_train_history(history, title):
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(len(loss))
  plt.figure()
  plt.plot(epochs, loss, 'b', label='Training loss')
  plt.plot(epochs, val_loss, 'r', label='Validation loss')
  plt.title(title)
  plt.legend()
  plt.show()

def plot_all(trained_model, batched_val_data, indexes, ground_truth):
    predictions = [trained_model.predict(elem)[0] for elem in batched_val_data]
    pred_size = len(indexes) - len(predictions)
    plt.plot(indexes[pred_size:], predictions, 'r')
    plt.plot(indexes[pred_size:], ground_truth[pred_size:], 'b')
# Todo: use separate test-data instead of using validation data for testing

def plot_results(training_history, trained_model, batched_val_data, indexes, ground_truth):
    plot_train_history(training_history, 'Multi-Step Training and validation loss')
    # savefig(f'{ATTEMPT_NAME}/features.png', bbox_inches='tight')
    plot_all(trained_model, batched_val_data, indexes, ground_truth)
    # savefig(f'{ATTEMPT_NAME}/features.png', bbox_inches='tight')
