In [1]:
##########################Load Libraries  ####################################
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt     
import re
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV, KFold
from memory_reduction import reduce_memory_usage

import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 

from window_data import *

import os, errno
import sys
from tqdm import trange

import warnings
warnings.filterwarnings('ignore')

## Pre Processing & Dataset Preparation

### Load and Clean Data

Data is stored in csv files. We will only consider traces where the breath hold listed in the CSV and the breathold I calculated differ by fewer than 2 seconds.

In [2]:
breath_df = pd.read_pickle('breath_df')
under2 = breath_df[abs(breath_df['Csv_breath_holds']\
                       -breath_df['Data_breath_holds'])<=2].reset_index()
under2.head(2)

Unnamed: 0,index,Trace,Csv_breath_holds,Data_breath_holds,Full_trace,breathhold_idx,bh_start_end
0,0,"[0.1319, 0.1363, 0.1408, 0.1452, 0.1497, 0.154...",31.29,31.69,"[0.0003, 0.0006, 0.001, 0.0014, 0.0019, 0.0023...","[2661, 2662, 2663, 2664, 2665, 2666, 2667, 266...","(2661, 5829)"
1,1,"[0.4727, 0.4687, 0.4646, 0.4605, 0.4563, 0.452...",30.61,31.4,"[0.0006, 0.0009, 0.0013, 0.0017, 0.0022, 0.002...","[2386, 2387, 2388, 2389, 2390, 2391, 2392, 239...","(2386, 5525)"


In [3]:
type(under2)

pandas.core.frame.DataFrame

### Memory Usage 
Reduce memory usage to avoid kernel crashing during normalization.

In [3]:
under2 = reduce_memory_usage(under2)

Mem. usage decreased to  0.17 Mb from  0.26 Mb (32.1% reduction)


In [4]:
# Create a copy of the dataframe and rename
df = under2.copy()

### Split Data

In [5]:
train_size = int((len(df)) * 0.70)
valid_size = int((len(df))*.20)
test_size = int((len(df))*.10)

In [6]:
train = df.iloc[0:train_size]
valid = df.iloc[train_size:train_size+valid_size]
test = df.iloc[train_size+valid_size:]
print("train data shape is:",train.shape)
print("validation data shape is:",valid.shape)
print("test data shape is:",test.shape)

train data shape is: (3355, 7)
validation data shape is: (958, 7)
test data shape is: (480, 7)


### Normalize Data
Normalize curves to −1 to 1

In [7]:
X_train = list(train['Trace'])
X_val = list(valid['Trace'])
X_test = list(test['Trace'])

In [8]:
scaler = MinMaxScaler() # creates the scaler
scaler.fit(X_train)
x_train2 = scaler.transform(X_train)
x_val2 = scaler.transform(X_val)
x_test2 = scaler.transform(X_test)

### Sliding Window

We wil use a sliding window of 100 points.
Our target(labels) is 10 points

In [9]:
seq_length = 1

In [10]:
Train = create_sliding_window_data(n_steps=100, num_outputs=seq_length, dataset=x_train2[:50])
Valid = create_windowed_data(n_steps=100, num_outputs=seq_length, dataset=x_val2[:25])
Test = create_windowed_data(n_steps=100, num_outputs=seq_length, dataset=x_test2[:10])

### Define Dataset

Reshape data and covert data to tensors

In [11]:
def reshape_series(series):
    series = np.transpose(np.array(series))
    return series.reshape(series.shape[1],series.shape[0], 1)

x_train = reshape_series(list(Train['Trace']))
x_val = reshape_series(list(Valid['Trace']))
x_test = reshape_series(list(Test['Trace']))

def reshape_target(series):
    series = np.transpose(np.array(series))
    return series.reshape(series.shape[1],series.shape[0])
y_train = reshape_target(list(Train['Next_pts']))
y_val = reshape_target(list(Train['Next_pts']))
y_test = reshape_target(list(Test['Next_pts']))

In [12]:
class timeseries(Dataset):
    def __init__(self,x,y):
        self.x = torch.tensor(x,dtype=torch.float32)
        self.y = torch.tensor(y,dtype=torch.float32)
        self.len = x.shape[0]

    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
  
    def __len__(self):
        return self.len

In [13]:
train_dataset = timeseries(x_train, y_train)
valid_dataset = timeseries(x_val, y_val)
test_dataset = timeseries(x_test, y_test)

In [14]:
print ("train X shape is:",train_dataset.x.shape)
print ("train y shape is:",train_dataset.y.shape)

train X shape is: torch.Size([70000, 100, 1])
train y shape is: torch.Size([70000, 1])


In [15]:
trainX = train_dataset.x
trainY = train_dataset.y

valX = valid_dataset.x
valY = valid_dataset.y

testX = test_dataset.x
testY = test_dataset.y

### Dataloader

In [16]:
train_loader = DataLoader(train_dataset, batch_size=200, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=200, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=200, shuffle=False)

## LSTM

In [17]:
class ShallowRegressionLSTM(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 3

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()

        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0]).flatten()  # First dim of Hn is num_layers, which is set to 1 above.

        return out

In [67]:
learning_rate = .001
num_hidden_units = 15

model = ShallowRegressionLSTM(num_sensors=1, hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [69]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()

    for X, y in data_loader:
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function):

    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")


print("Untrained test\n--------")
test_model(test_loader, model, loss_function)
print()

for ix_epoch in range(10):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    test_model(test_loader, model, loss_function)
    print()

Untrained test
--------
Test loss: 0.026432551443576813

Epoch 0
---------
Train loss: 0.043853711762598585
Test loss: 0.02644255943596363

Epoch 1
---------
Train loss: 0.04384024618991784
Test loss: 0.026297517120838165

Epoch 2
---------
Train loss: 0.043852296130997795
Test loss: 0.026113780215382576

Epoch 3
---------
Train loss: 0.043884772982980526
Test loss: 0.026221005246043205

Epoch 4
---------
Train loss: 0.04386956592755658
Test loss: 0.026590121909976006

Epoch 5
---------
Train loss: 0.043859637549945284
Test loss: 0.02742427960038185

Epoch 6
---------
Train loss: 0.04384628454489367
Test loss: 0.02636834979057312

Epoch 7
---------
Train loss: 0.0438630712883813
Test loss: 0.026132186874747276

Epoch 8
---------
Train loss: 0.04386671114712953
Test loss: 0.02672279253602028

Epoch 9
---------
Train loss: 0.04383499239704439
Test loss: 0.026311567053198814



In [18]:
#torch.save(model, 'simple_lstm')
model = ShallowRegressionLSTM(num_sensors=1, hidden_units=15)
model = torch.load('simple_lstm')

In [20]:
model(testX[0:1])

tensor([0.2761], grad_fn=<ReshapeAliasBackward0>)

In [203]:
def make_pred(x_test, y_test, n_preds):
    test_dataset = timeseries(x_test, y_test)
    x = test_dataset.x
    preds = []

    for i in range(n_preds): 
        test_pred = model(x)
        preds.append(test_pred)
        x = torch.cat((x, test_pred.view(-1,1, 1)), 1)
        
    current_pred = preds[0] 
    for i in range(n_preds-1):
        current_pred = torch.cat((current_pred.view(-1,1, i+1), \
                              preds[i+1].view(-1,1, 1)), -1)
        
    return current_pred
    

In [208]:
preds = make_pred(x_test, y_test, 10)

In [209]:
preds[0]

tensor([[0.2761, 0.2769, 0.2772, 0.2772, 0.2772, 0.2772, 0.2772, 0.2772, 0.2772,
         0.2772]], grad_fn=<SelectBackward0>)