## Project TODO Steps
 1. create dataset
 2. create network
 3. train network
 4. test network

## Parameters to test
 1. sequence length
 2. amplitude
 3. frequency (period)
 4. offset
 5. phase
 6. noise
 
## Questions to answer
 1. What would a batch look like?
 2. How many LSTM layers do we need? nodes per LSTM layer?
 3. Can we learn if our timestamps are not uniformly spaced.

In [None]:
import numpy as np
import pandas as pd
from random import random

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Masking
from keras.utils.np_utils import to_categorical


In [None]:
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 16
BIGGEST_SIZE = 20
plt.rc('font', size=BIGGEST_SIZE)         # controls default text sizes
plt.rc('axes', titlesize=BIGGEST_SIZE)    # fontsize of the axes title
plt.rc('axes', labelsize=BIGGEST_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)   # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)   # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)   # legend fontsize
plt.rc('figure', titlesize=BIGGEST_SIZE)  # fontsize of the figure title

## Build a signal generator

In [None]:
class Signal:
    def __init__(self,
                 timestamps,
                 offset=0,
                 offset_range=None,
                 amplitude=1,
                 amplitude_range=None,
                 period=1,
                 period_range=None,
                 phase=0,
                 phase_range=None):
        
        self.timestamps = timestamps
        self._sample = None
        
        if offset_range is not None:
            assert len(offset_range) == 2
        else:
            assert offset is not None
        self.offset = offset
        self.offset_range = offset_range
        
        if amplitude_range is not None:
            assert len(amplitude_range) == 2
        else:
            assert amplitude is not None
        self.amplitude = amplitude
        self.amplitude_range = amplitude_range
        
        if period_range is not None:
            assert len(period_range) == 2
        else:
            assert period is not None
        self.period = period
        self.period_range = period_range
        
        if phase_range is not None:
            assert len(phase_range) == 2
        else:
            assert phase is not None
        self.phase = phase
        self.phase_range = phase_range
        
    def __call__(self):
        return self.sample
    
    @property
    def sample(self):
        if self._sample is None:
            self._sample = self.generate()
        return self._sample
    
    def _sample_random(self, r, r_range):
        return r if r_range is None else (r_range[1] - r_range[0]) * np.random.random() + r_range[0]
    
    def generate(self):
        
        self.offset = self._sample_random(self.offset, self.offset_range)
        self.amplitude = self._sample_random(self.amplitude, self.amplitude_range)
        self.period = self._sample_random(self.period, self.period_range)
        self.phase = self._sample_random(self.phase, self.phase_range)
        
        self._sample = self.offset + self.amplitude * np.cos(2.0 * np.pi * self.timestamps / self.period - self.phase)
        return self._sample


In [None]:
# generate data for the lstm
def generate_sig_data(n_timesteps, coeffs_list):
    timestamps = np.linspace(0, 20, n_timesteps)
    n_signals = len(coeffs_list)
    signals = np.empty((n_signals, n_timesteps))
    for i, coeffs in enumerate(coeffs_list):
        signal = Signal(timestamps, **coeffs)
        signals[i, :] = signal()

    shuff = np.arange(n_timesteps)
    np.random.shuffle(shuff)
    indices = np.zeros(n_timesteps, dtype=int)

    for s in range(n_signals):
        indices[np.where(shuff < s * n_timesteps // n_signals)] += 1

    indices_tup = (indices, np.arange(n_timesteps))
    one_hot = np.zeros((n_signals, n_timesteps), dtype=np.float)
    one_hot[indices_tup] = 1
    print(one_hot)
    x = np.sum(one_hot * signals, axis=0)
    x = np.vstack((timestamps, x)).T
    x = x.reshape(len(x), 2, 1)
    y = to_categorical(indices, num_classes=n_signals)
    print(y.T)
    return x, y


In [None]:
# start off with simplest case for proof of concept
n_timesteps = 41

sig1_coeffs = {'phase_range': (0, np.pi), 'period': 100.0, 'offset': 2.0}
sig2_coeffs = {'phase_range': (0, np.pi), 'period': 23.0}
coeffs_list = [sig1_coeffs, sig2_coeffs]
n_signals = len(coeffs_list)
X, y = generate_sig_data(n_timesteps, coeffs_list)
print(X.shape, y.shape)

In [None]:
for i in range(n_signals):
    plt.plot(timesteps, sigs[i], marker='.')

In [None]:
plt.scatter(timesteps, mixed_sig, marker='.')
# plt.plot(timesteps, mixed_sig, marker='.')

In [None]:
# define model
model = Sequential()
# model.add(Masking(mask_value=-1, input_shape=(1,)))
# model.add(LSTM(5))
model.add(LSTM(5, input_shape=(2,1)))
model.add(Dense(n_signals, activation='softmax'))


# model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
# model.add(LSTM(32, return_sequences=True))
# model.add(LSTM(32))

# model.add(LSTM(2048, return_sequences=False,
#                input_shape=self.input_shape,
#                dropout=0.5))
# model.add(Dense(512, activation='relu'))
# model.add(Dropout(0.5))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# fit model
for i in range(200):
    X, y = generate_sig_data(n_timesteps, coeffs_list)
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)

# evaluate model on new data
X, y = generate_sig_data(n_timesteps, coeffs_list)
yhat = model.predict(X)
for i in range(len(X)):
    print('Expected', y[i,0], 'Predicted', yhat[i,0])
    
for i in range(len(X)):
    x = np.reshape(X[i], (1, 2, 1))
    yhat = model.predict(x)
    print('x', X[i], 'Expected', y[i], 'Predicted', yhat[0])

In [None]:
# generate data for the lstm
def generate_data(n_timesteps):
    # generate sequence
    sequence = np.array([random() for _ in range(n_timesteps)])
    # create lag
    df = pd.DataFrame(sequence)
    df = pd.concat([df.shift(1), df], axis=1)
    # replace missing values with -1
    df.fillna(-1, inplace=True)
    values = df.values
    # specify input and output data
    X, y = values, values[:, 1]
    # reshape
    X = X.reshape(len(X), 2, 1)
    y = y.reshape(len(y), 1)
    return X, y

X, y = generate_data(n_timesteps)
print(X.shape, y.shape)

In [None]:
n_timesteps = 10
# define model
model = Sequential()
model.add(LSTM(5, input_shape=(2, 1)))
# model.add(Masking(mask_value=-1, input_shape=(2, 1)))
# model.add(LSTM(5))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# fit model
for i in range(500):
    X, y = generate_data(n_timesteps)
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)
# evaluate model on new data
X, y = generate_data(n_timesteps)
yhat = model.predict(X)
for i in range(len(X)):
    print('Expected', y[i,0], 'Predicted', yhat[i,0])

In [None]:
data_dim = 16
timesteps = 8
num_classes = 10

# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
# returns a sequence of vectors of dimension 32
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))  # return a single vector of dimension 32
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Generate dummy training data
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))

# Generate dummy validation data
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))

model.fit(x_train, y_train,
          batch_size=64, epochs=5,
          validation_data=(x_val, y_val))