## Project TODO Steps
 1. create dataset
 2. create network
 3. train network
 4. test network

## Parameters to test
 1. sequence length
 2. amplitude
 3. frequency (period)
 4. offset
 5. phase
 6. noise
 
## Questions to answer
 1. What would a batch look like?
 2. How many LSTM layers do we need? nodes per LSTM layer?
 3. Can we learn if our timestamps are not uniformly spaced.

In [1]:
import numpy as np
import pandas as pd
from random import random

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Masking
from keras.utils.np_utils import to_categorical


Using TensorFlow backend.


In [3]:
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 16
BIGGEST_SIZE = 20
plt.rc('font', size=BIGGEST_SIZE)         # controls default text sizes
plt.rc('axes', titlesize=BIGGEST_SIZE)    # fontsize of the axes title
plt.rc('axes', labelsize=BIGGEST_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)   # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)   # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)   # legend fontsize
plt.rc('figure', titlesize=BIGGEST_SIZE)  # fontsize of the figure title

## Build a signal generator

In [21]:
class Signal:
    def __init__(self,
                 timestamps,
                 offset=0,
                 offset_range=None,
                 amplitude=1,
                 amplitude_range=None,
                 period=1,
                 period_range=None,
                 phase=0,
                 phase_range=None):
        
        self.timestamps = timestamps
        self._sample = None
        
        if offset_range is not None:
            assert len(offset_range) == 2
        else:
            assert offset is not None
        self.offset = offset
        self.offset_range = offset_range
        
        if amplitude_range is not None:
            assert len(amplitude_range) == 2
        else:
            assert amplitude is not None
        self.amplitude = amplitude
        self.amplitude_range = amplitude_range
        
        if period_range is not None:
            assert len(period_range) == 2
        else:
            assert period is not None
        self.period = period
        self.period_range = period_range
        
        if phase_range is not None:
            assert len(phase_range) == 2
        else:
            assert phase is not None
        self.phase = phase
        self.phase_range = phase_range
        
    def __call__(self):
        return self.sample
    
    @property
    def sample(self):
        if self._sample is None:
            self._sample = self.generate()
        return self._sample
    
    def _sample_random(self, r, r_range):
        return r if r_range is None else (r_range[1] - r_range[0]) * np.random.random() + r_range[0]
    
    def generate(self):
        
        self.offset = self._sample_random(self.offset, self.offset_range)
        self.amplitude = self._sample_random(self.amplitude, self.amplitude_range)
        self.period = self._sample_random(self.period, self.period_range)
        self.phase = self._sample_random(self.phase, self.phase_range)
        
        self._sample = self.offset + self.amplitude * np.cos(2.0 * np.pi * self.timestamps / self.period - self.phase)
        return self._sample


In [127]:
# generate data for the lstm
def generate_sig_data(n_timesteps, coeffs_list):
    timestamps = np.linspace(0, 20, n_timesteps)
    n_signals = len(coeffs_list)
#     print(n_signals)
    # generate sequence
    signals = np.empty((n_timesteps, n_signals))
    for i, coeffs in enumerate(coeffs_list):
        signal = Signal(timestamps, **coeffs)
#         print(signal())
#         print(signals.shape)
        signals[:, i] = signal()

    shuff = np.arange(n_timesteps)
    np.random.shuffle(shuff)
    indices = np.zeros(n_timesteps, dtype=int)

    for s in range(n_signals):
        indices[np.where(shuff < s * n_timesteps//n_signals)] += 1

    indices_tup = ((np.arange(n_timesteps), indices))
    
    one_hot = np.zeros((n_timesteps, n_signals), dtype=int)
    one_hot[indices_tup] = 1
#     print(one_hot)
    x = np.sum(one_hot * signals, axis=1)
#     print(x)
    x = np.vstack((timestamps, x)).T
#     print(x.shape)
    y = to_categorical(indices, num_classes=n_signals)
#     print(y)
    # reshape
    x = x.reshape(len(x), 2, 1)
#     print(x.shape, y.shape)
    return x, y


In [137]:
# start off with simplest case for proof of concept
n_timesteps = 301

sig1_coeffs = {'phase_range': (0, np.pi), 'period': 100.0, 'offset': 0.0}
sig2_coeffs = {'phase_range': (0, np.pi), 'period': 23.0}
coeffs_list = [sig1_coeffs, sig2_coeffs]
n_signals = len(coeffs_list)
X, y = generate_sig_data(n_timesteps, coeffs_list)

In [None]:
for i in range(n_signals):
    plt.plot(timesteps, sigs[i], marker='.')

In [None]:
plt.scatter(timesteps, mixed_sig, marker='.')
# plt.plot(timesteps, mixed_sig, marker='.')

In [None]:
model = Sequential()
model.add(LSTM(2048, return_sequences=False,
               input_shape=self.input_shape,
               dropout=0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(self.nb_classes, activation='softmax'))

In [138]:
# define model
model = Sequential()
# model.add(Masking(mask_value=-1, input_shape=(1,)))
# model.add(LSTM(5))
model.add(LSTM(5, input_shape=(2,1)))

# model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
# model.add(LSTM(32, return_sequences=True))
# model.add(LSTM(32))

# model.add(Dropout(0.5))

model.add(Dense(n_signals, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# fit model
for i in range(200):
    X, y = generate_sig_data(n_timesteps, coeffs_list)
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)
# evaluate model on new data
X, y = generate_sig_data(n_timesteps, coeffs_list)
yhat = model.predict(X)
for i in range(len(X)):
    print('Expected', y[i,0], 'Predicted', yhat[i,0])
    
for i in range(len(X)):
    x = np.reshape(X[i], (1, 2, 1))
    yhat = model.predict(x)
    print('x', X[i], 'Expected', y[i], 'Predicted', yhat[0])

Epoch 1/1
 - 2s - loss: 0.6966 - acc: 0.4186
Epoch 1/1
 - 0s - loss: 0.6926 - acc: 0.5515
Epoch 1/1
 - 0s - loss: 0.6914 - acc: 0.5050
Epoch 1/1
 - 0s - loss: 0.6973 - acc: 0.4585
Epoch 1/1
 - 0s - loss: 0.6873 - acc: 0.5482
Epoch 1/1
 - 0s - loss: 0.7003 - acc: 0.3688
Epoch 1/1
 - 0s - loss: 0.6853 - acc: 0.5748
Epoch 1/1
 - 0s - loss: 0.6900 - acc: 0.4983
Epoch 1/1
 - 0s - loss: 0.6911 - acc: 0.5349
Epoch 1/1
 - 0s - loss: 0.6865 - acc: 0.6080
Epoch 1/1
 - 0s - loss: 0.6418 - acc: 0.7542
Epoch 1/1
 - 0s - loss: 0.6415 - acc: 0.6877
Epoch 1/1
 - 0s - loss: 0.7326 - acc: 0.3023
Epoch 1/1
 - 0s - loss: 0.7067 - acc: 0.3123
Epoch 1/1
 - 0s - loss: 0.6618 - acc: 0.6777
Epoch 1/1
 - 0s - loss: 0.5886 - acc: 0.7841
Epoch 1/1
 - 0s - loss: 0.6779 - acc: 0.5349
Epoch 1/1
 - 0s - loss: 0.5421 - acc: 0.7973
Epoch 1/1
 - 0s - loss: 0.4929 - acc: 0.8173
Epoch 1/1
 - 0s - loss: 0.9532 - acc: 0.2458
Epoch 1/1
 - 0s - loss: 0.5335 - acc: 0.7774
Epoch 1/1
 - 0s - loss: 0.7883 - acc: 0.3223
Epoch 1/1


 - 1s - loss: 0.4424 - acc: 0.7110
Epoch 1/1
 - 1s - loss: 1.0642 - acc: 0.5050
Epoch 1/1
 - 0s - loss: 0.3438 - acc: 0.8040
Epoch 1/1
 - 0s - loss: 0.2663 - acc: 0.8738
Epoch 1/1
 - 0s - loss: 0.4217 - acc: 0.7409
Epoch 1/1
 - 0s - loss: 0.2385 - acc: 0.8605
Epoch 1/1
 - 0s - loss: 0.7813 - acc: 0.6279
Epoch 1/1
 - 0s - loss: 0.4225 - acc: 0.7010
Epoch 1/1
 - 0s - loss: 0.6487 - acc: 0.7043
Epoch 1/1
 - 0s - loss: 0.3979 - acc: 0.7309
Epoch 1/1
 - 0s - loss: 0.2710 - acc: 0.8372
Epoch 1/1
 - 0s - loss: 0.7106 - acc: 0.6877
Epoch 1/1
 - 0s - loss: 0.3688 - acc: 0.7409
Epoch 1/1
 - 0s - loss: 0.6484 - acc: 0.6545
Epoch 1/1
 - 0s - loss: 0.4818 - acc: 0.7176
Epoch 1/1
 - 0s - loss: 0.9391 - acc: 0.6113
Epoch 1/1
 - 0s - loss: 0.5149 - acc: 0.6777
Epoch 1/1
 - 0s - loss: 0.3499 - acc: 0.7708
Expected 1.0 Predicted 0.00043826504
Expected 0.0 Predicted 0.87745374
Expected 1.0 Predicted 0.018053263
Expected 0.0 Predicted 0.5769612
Expected 1.0 Predicted 0.26072213
Expected 1.0 Predicted 0.54

x [[ 6.6       ]
 [-0.60887999]] Expected [1. 0.] Predicted [0.8748858  0.12511425]
x [[ 6.66666667]
 [-0.60555185]] Expected [1. 0.] Predicted [0.8625318  0.13746828]
x [[6.73333333]
 [0.19212584]] Expected [0. 1.] Predicted [0.22838221 0.77161777]
x [[ 6.8       ]
 [-0.59886375]] Expected [1. 0.] Predicted [0.83437645 0.16562358]
x [[6.86666667]
 [0.15626062]] Expected [0. 1.] Predicted [0.20843568 0.79156435]
x [[6.93333333]
 [0.13824729]] Expected [0. 1.] Predicted [0.20021667 0.7997834 ]
x [[7.        ]
 [0.12018811]] Expected [0. 1.] Predicted [0.19496585 0.80503416]
x [[ 7.06666667]
 [-0.58536193]] Expected [1. 0.] Predicted [0.78208464 0.21791534]
x [[ 7.13333333]
 [-0.58196065]] Expected [1. 0.] Predicted [0.77057356 0.2294264 ]
x [[ 7.2       ]
 [-0.57854916]] Expected [1. 0.] Predicted [0.7586077  0.24139233]
x [[7.26666667]
 [0.04761283]] Expected [0. 1.] Predicted [0.17973082 0.82026917]
x [[ 7.33333333]
 [-0.57169579]] Expected [1. 0.] Predicted [0.7333204  0.26667958]
x 

x [[ 1.69333333e+01]
 [-5.38541506e-03]] Expected [1. 0.] Predicted [0.18455403 0.8154459 ]
x [[ 1.7000000e+01]
 [-1.1966506e-03]] Expected [1. 0.] Predicted [0.18798412 0.8120159 ]
x [[17.06666667]
 [-0.48995679]] Expected [0. 1.] Predicted [0.00984889 0.99015117]
x [[1.71333333e+01]
 [7.18086781e-03]] Expected [1. 0.] Predicted [0.19502087 0.80497915]
x [[1.72000000e+01]
 [1.13694748e-02]] Expected [1. 0.] Predicted [0.1986296 0.8013704]
x [[17.26666667]
 [-0.44162027]] Expected [0. 1.] Predicted [0.0143785 0.9856215]
x [[17.33333333]
 [-0.42520798]] Expected [0. 1.] Predicted [0.01630558 0.9836945 ]
x [[17.4      ]
 [ 0.0239338]] Expected [1. 0.] Predicted [0.2098294  0.79017055]
x [[17.46666667]
 [ 0.02812117]] Expected [1. 0.] Predicted [0.21369044 0.78630954]
x [[17.53333333]
 [ 0.03230805]] Expected [1. 0.] Predicted [0.21761696 0.7823831 ]
x [[17.6       ]
 [ 0.03649436]] Expected [1. 0.] Predicted [0.22160995 0.77839   ]
x [[17.66666667]
 [ 0.04068002]] Expected [1. 0.] Predic

In [91]:
# generate data for the lstm
def generate_data(n_timesteps):
    # generate sequence
    sequence = np.array([random() for _ in range(n_timesteps)])
    # create lag
    df = pd.DataFrame(sequence)
    df = pd.concat([df.shift(1), df], axis=1)
    # replace missing values with -1
    df.fillna(-1, inplace=True)
    values = df.values
    # specify input and output data
    X, y = values, values[:, 1]
    # reshape
    X = X.reshape(len(X), 2, 1)
    y = y.reshape(len(y), 1)
    return X, y

X, y = generate_data(n_timesteps)
print(X.shape, y.shape)

(21, 2, 1) (21, 1)


In [41]:
n_timesteps = 10
# define model
model = Sequential()
model.add(LSTM(5, input_shape=(2, 1)))
# model.add(Masking(mask_value=-1, input_shape=(2, 1)))
# model.add(LSTM(5))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# fit model
for i in range(500):
    X, y = generate_data(n_timesteps)
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)
# evaluate model on new data
X, y = generate_data(n_timesteps)
yhat = model.predict(X)
for i in range(len(X)):
    print('Expected', y[i,0], 'Predicted', yhat[i,0])

Epoch 1/1
 - 1s - loss: 0.3131
Epoch 1/1
 - 0s - loss: 0.5210
Epoch 1/1
 - 0s - loss: 0.2928
Epoch 1/1
 - 0s - loss: 0.2032
Epoch 1/1
 - 0s - loss: 0.3263
Epoch 1/1
 - 0s - loss: 0.1725
Epoch 1/1
 - 0s - loss: 0.1052
Epoch 1/1
 - 0s - loss: 0.2339
Epoch 1/1
 - 0s - loss: 0.1049
Epoch 1/1
 - 0s - loss: 0.2566
Epoch 1/1
 - 0s - loss: 0.2471
Epoch 1/1
 - 0s - loss: 0.1710
Epoch 1/1
 - 0s - loss: 0.0946
Epoch 1/1
 - 0s - loss: 0.0820
Epoch 1/1
 - 0s - loss: 0.0758
Epoch 1/1
 - 0s - loss: 0.0597
Epoch 1/1
 - 0s - loss: 0.0793
Epoch 1/1
 - 0s - loss: 0.1136
Epoch 1/1
 - 0s - loss: 0.1283
Epoch 1/1
 - 0s - loss: 0.1416
Epoch 1/1
 - 0s - loss: 0.0893
Epoch 1/1
 - 0s - loss: 0.0595
Epoch 1/1
 - 0s - loss: 0.0578
Epoch 1/1
 - 0s - loss: 0.0652
Epoch 1/1
 - 0s - loss: 0.0770
Epoch 1/1
 - 0s - loss: 0.0523
Epoch 1/1
 - 0s - loss: 0.0715
Epoch 1/1
 - 0s - loss: 0.0389
Epoch 1/1
 - 0s - loss: 0.0726
Epoch 1/1
 - 0s - loss: 0.0415
Epoch 1/1
 - 0s - loss: 0.0462
Epoch 1/1
 - 0s - loss: 0.0774
Epoch 1/

Epoch 1/1
 - 0s - loss: 0.0030
Epoch 1/1
 - 0s - loss: 0.0016
Epoch 1/1
 - 0s - loss: 4.8867e-04
Epoch 1/1
 - 0s - loss: 4.9417e-04
Epoch 1/1
 - 0s - loss: 4.2700e-04
Epoch 1/1
 - 0s - loss: 0.0012
Epoch 1/1
 - 0s - loss: 0.0015
Epoch 1/1
 - 0s - loss: 4.6991e-04
Epoch 1/1
 - 0s - loss: 0.0010
Epoch 1/1
 - 0s - loss: 6.3793e-04
Epoch 1/1
 - 0s - loss: 3.3648e-04
Epoch 1/1
 - 0s - loss: 0.0012
Epoch 1/1
 - 0s - loss: 0.0024
Epoch 1/1
 - 0s - loss: 6.1067e-04
Epoch 1/1
 - 0s - loss: 4.1298e-04
Epoch 1/1
 - 0s - loss: 0.0013
Epoch 1/1
 - 0s - loss: 0.0011
Epoch 1/1
 - 0s - loss: 0.0010
Epoch 1/1
 - 0s - loss: 4.4303e-04
Epoch 1/1
 - 0s - loss: 0.0012
Epoch 1/1
 - 0s - loss: 7.3432e-04
Epoch 1/1
 - 0s - loss: 4.6198e-04
Epoch 1/1
 - 0s - loss: 0.0014
Epoch 1/1
 - 0s - loss: 6.4119e-04
Epoch 1/1
 - 0s - loss: 0.0011
Epoch 1/1
 - 0s - loss: 0.0014
Epoch 1/1
 - 0s - loss: 8.2644e-04
Epoch 1/1
 - 0s - loss: 2.5061e-04
Epoch 1/1
 - 0s - loss: 7.3308e-04
Epoch 1/1
 - 0s - loss: 3.5778e-04
Epoch 

Expected 0.6795905023905263 Predicted 0.6680278
Expected 0.03499616470417921 Predicted 0.047724657
Expected 0.09055650577168628 Predicted 0.107437044
Expected 0.6048022363530464 Predicted 0.60661143
Expected 0.7566356513865894 Predicted 0.74626005
Expected 0.7195104160716982 Predicted 0.7080582
Expected 0.45749277574478364 Predicted 0.4302166
Expected 0.41390561836739725 Predicted 0.3888765
Expected 0.9762064699959448 Predicted 0.97471184
Expected 0.24290868939722987 Predicted 0.23881254


In [None]:
data_dim = 16
timesteps = 8
num_classes = 10

# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
# returns a sequence of vectors of dimension 32
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))  # return a single vector of dimension 32
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Generate dummy training data
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))

# Generate dummy validation data
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))

model.fit(x_train, y_train,
          batch_size=64, epochs=5,
          validation_data=(x_val, y_val))