In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torch import optim
from lstm import LSTM

df = pd.read_csv('./user06/1599757200/eda_temp/1599786420.csv')

In [2]:
CFG = {
    'SEQ_LENGTH':20,
    'INPUT_DIM':2,
    'HIDDEN_DIM':10,
    'OUTPUT_DIM':1,
    'EPOCHS':100,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':100,
    'SEED':42
}

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
def build_dataset(data, seq_len):
    dataX = []
    dataY = []
    
    for i in range(len(data)-CFG['SEQ_LENGTH']):
        x = data[i:i+seq_len, :]
        y = data[i+seq_len]
        
        dataX.append(x)
        dataY.append(y)
    return np.array(dataX), np.array(dataY)


# df = df[['timestamp', 'eda', 'temp']]
df = df[['eda', 'temp']]
df = df.fillna(0)

train_size = int(len(df)*0.8)
train_set = df[0:train_size]
val_set = df[train_size-CFG['SEQ_LENGTH']:]

In [5]:
train_set

Unnamed: 0,eda,temp
0,0.000000,0.00
1,0.000000,0.00
2,0.000000,0.00
3,0.000000,0.00
4,0.000000,0.00
...,...,...
187,0.294671,34.03
188,0.288265,34.03
189,0.284421,34.03
190,0.283140,34.03


In [6]:
val_set

Unnamed: 0,eda,temp
172,0.228049,34.03
173,0.224206,34.03
174,0.225487,34.03
175,0.231893,34.03
176,0.229331,34.05
...,...,...
235,0.244705,34.03
236,0.243424,34.03
237,0.245986,34.03
238,0.247267,34.03


In [7]:
scaler_x = MinMaxScaler()
scaler_x.fit(train_set.iloc[:,:])

train_set.iloc[:,:] = scaler_x.transform(train_set.iloc[:,:])
val_set.iloc[:,:] = scaler_x.transform(val_set.iloc[:,:])

scaler_y = MinMaxScaler()
scaler_y.fit(train_set.iloc[:,:])

train_set.iloc[:,:] = scaler_y.transform(train_set.iloc[:,:])
val_set.iloc[:,:] = scaler_y.transform(val_set.iloc[:,:])

trainX, trainY = build_dataset(np.array(train_set), CFG['SEQ_LENGTH'])
valX, valY = build_dataset(np.array(val_set), CFG['SEQ_LENGTH'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_set.iloc[:,:] = scaler_x.transform(train_set.iloc[:,:])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_set.iloc[:,:] = scaler_x.transform(val_set.iloc[:,:])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_set.iloc[:,:] = scaler_y.transform(train_set.iloc[:,:])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [8]:
trainX

array([[[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ],
        ...,
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ],
        ...,
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ],
        ...,
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       ...,

       [[0.38735122, 0.99941263],
        [0.38735122, 0.99941263],
        [0.39399061, 0.99941263],
        ...,
        [0.89865498, 0.02935133],
        [0.87953434, 0.02935133],
        [0.86041371, 0.02935133]],

       [[0.38735122, 0.99941263],
        [0.39399061, 0.99941263],
        [0.68068092, 0.02935133],
        .

In [9]:
trainY

array([[0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.

In [10]:
trainX_tensor = torch.FloatTensor(trainX).to(device)
trainY_tensor = torch.FloatTensor(trainY).to(device)

valX_tensor = torch.FloatTensor(valX).to(device)
valY_tensor = torch.FloatTensor(valY).to(device)

dataset = TensorDataset(trainX_tensor, trainY_tensor)
dataloader = DataLoader(dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, drop_last=True)

In [11]:
LSTM = LSTM(CFG['INPUT_DIM'], CFG['HIDDEN_DIM'], CFG['SEQ_LENGTH'], CFG['OUTPUT_DIM'], 1).to(device)

In [12]:
def train_model(model, train_df, epochs, lr, verbose, patience):
    criterion = nn.MSELoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    train_history = np.zeros(epochs)
    
    for epoch in range(epochs):
        avg_cost = 0
        total_batch = len(train_df)
        
        for batch_idx, samples in enumerate(train_df):
            x_train, y_train = samples
            model.reset_hidden_state()
            outputs = model(x_train) # h(x)
            loss = criterion(outputs, y_train) # h(y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_cost += loss/total_batch
        train_history[epoch] = avg_cost
        
        if epoch % verbose == 0:
            print('Epoch ', '%02d' % (epoch), 'Train Loss: ', '{:.4f}'.format(avg_cost))
        if (epoch % patience == 0) & (epoch != 0):
            if train_history[epoch-patience] < train_history[epoch]:
                print('\n Early Stopping')
                break
    return model.eval(), train_history

In [13]:
model, train_history = train_model(LSTM, dataloader, epochs=CFG['EPOCHS'], lr=CFG['LEARNING_RATE'], verbose=20, patience=10)

Epoch  00 Train Loss:  0.2316
Epoch  20 Train Loss:  0.2251
Epoch  40 Train Loss:  0.2188
Epoch  60 Train Loss:  0.2128
Epoch  80 Train Loss:  0.2069


  return F.mse_loss(input, target, reduction=self.reduction)
