## We wannt to increase the data for training!!!  
- we will shift one segment step backwards from the seizure onset to create more batches(epochs) of sequences
- First we take more segments -  60 segments from the original data, when using data import
- we want have training length of 30 segments in total. 
- Starting from the segment 60th right before seizure, walking backwards with step of 1 segment, having blocks of 30 segments (training length) until we reach the 0th
- Thats how we increase the training data by 30 times.


In [None]:
import os
import numpy as np
from scipy.io import loadmat

# Construct LSTM sequences from one segment
def lstm_sequence(input_segment, target, sampling_freq, window, stride, block_s = 60):
    """ Function for generating blocks of LSTM input tensors
        input_segment : The EEG segment
        target        : 1/0 (preictal/interictial); None for test
        sampling_freq : Samplig frequency
        window        : Window size for 1d convolutions on each block
        stride        : Stride size of the 1d convolution
        block_s       : Size of the block in seconds (default = 60)
    """

    # Dimensions
    n_channels, T_segment = input_segment.shape

    # Determine block dimensions
    block_len = sampling_freq * block_s   # Length of each block
    n_blocks = (T_segment-1) // block_len # Number of blocks
    blocks = [block for block in range(0,(n_blocks+1)*block_len,block_len)]

    # Determine the sequence length for LSTM
    div = (block_len - window)%stride
    if (div != 0):
        pad = stride - div # Size of padding neded
    else:
        pad = 0

    seq_len = (block_len + pad - window) // stride

    # Initiate tensor
    X = np.zeros((n_blocks, seq_len, n_channels))

    # Loop over blocks and fill X
    for ib in range(n_blocks):
        # Get block
        data_block = input_segment[:, blocks[ib]:blocks[ib+1]]

        # Pad if necessary
        if (pad !=0):
            data_block = np.concatenate((data_block, np.zeros((n_channels, pad))), axis=1)

        # 1d convolution by mean
        index = 0
        for j in range(seq_len):
            X[ib, j, :] = np.mean(data_block[:, (index+j):(index+j+seq_len)], axis = 1)

    # Fill in the target
    if (target == 1):
        Y = np.ones(n_blocks)
    elif(target == 0):
        Y = np.zeros(n_blocks)
    else:
        Y = None

    return X, Y, n_blocks

# Collect all the segments to build a tesnsor input for LSTM
def lstm_build_input(clips, target, window, stride, block_s = 60):
    """ Collect all the data and build sequences for LSTM
        clips              : List of clips
        target             : 1/0 (preictal/interictial); None for test set
        window             : Window size for 1d convolutions
        stride             : Length of the stride in 1d convolution
        block_s            : Size of the block in seconds (default = 60)
    """

    # Number of clips
    n_clips = len(clips)

    # Loop over all clips and store data
    iclip = 0
    for file in clips:
        clip = loadmat(file)
        segment_name = list(clip.keys())[3] # Get segment name
        input_segment = clip[segment_name][0][0][0] # Get electrode data
        sampling_freq = np.squeeze(clip[segment_name][0][0][2]) # Sampling frequency

        # Get number of channels
        n_channels = clip[segment_name][0][0][0].shape[0]

        # Get tensor input and targets from blocks
        X, Y, n_blocks = lstm_sequence(input_segment, target, sampling_freq, window, stride, block_s)

        # Concatenate the tensor and target vector
        if (iclip == 0):
            X_train = X
            Y_train = Y[:,None] if Y is not None else None
        else:
            X_train = np.vstack((X_train,X))
            Y_train = np.vstack((Y_train,Y[:,None])) if Y is not None else None

        iclip +=1

    return X_train, Y_train


# Window, stride and block_s
window = 16000
stride = 100
block_s = 60

X_1, Y_1 = lstm_build_input(clips_preictal, 1, window, stride)
X_0, Y_0 = lstm_build_input(clips_interictal, 0, window, stride)

# Scale the data
X_1 = X_1 / np.max(np.abs(X_1), axis=1)[:,None,:]
X_0 = X_0 / np.max(np.abs(X_0), axis=1)[:,None,:]

# Combine the data
X = np.concatenate((X_0, X_1), axis = 0)
Y = np.concatenate((Y_0, Y_1), axis = 0)
Y = np.squeeze(Y)

print("Data shape = ", X.shape)