In [2]:
import numpy as np
#import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# Configuration

### Inputs

In [9]:
process_out_dir = '01_process/out/'

train_data_fpath = process_out_dir + 'train_data.npz'
valid_data_fpath = process_out_dir + 'valid_data.npz'
# not doing any test set stuff until the very, very end

In [10]:
extended_dir = '/caldera/projects/usgs/water/iidd/datasci/lake-temp/lake_ice_prediction/'

process_out_dir = extended_dir + process_out_dir

train_data_fpath = extended_dir + train_data_fpath
valid_data_fpath = extended_dir + valid_data_fpath

### Values

In [11]:
epochs = 10000

# plotting parameters
loss_curve_zoom_ymax = 0.15
loss_curve_zoom_ymin = 0.055

### Outputs

In [12]:
train_out_dir = '02_train/out/'

data_scalars_fpath =  train_out_dir + 'limitted_lstm_min_max_scalars.pt'
model_weights_fpath = train_out_dir + 'limitted_lstm_weights.pth'
train_predictions_fpath = train_out_dir + 'limitted_lstm_train_preds.npy'
valid_predictions_fpath = train_out_dir + 'limitted_lstm_valid_preds.npy'

In [31]:
data_scalars_fpath = extended_dir + data_scalars_fpath
model_weights_fpath = extended_dir + model_weights_fpath
train_predictions_fpath = extended_dir + train_predictions_fpath
valid_predictions_fpath = extended_dir + valid_predictions_fpath
#loss_lists_fpath = extended_dir + loss_lists_fpath

# Import

In [13]:
train_data = np.load(train_data_fpath, allow_pickle = True)
valid_data = np.load(valid_data_fpath, allow_pickle = True)

In [14]:
train_data.files

['x', 'y', 'dates', 'DOW', 'features']

In [15]:
train_x = train_data['x']
train_y = train_data['y']
train_dates = train_data['dates']
train_DOW = train_data['DOW']
train_variables = train_data['features']

In [16]:
valid_x = valid_data['x']
valid_y = valid_data['y']
valid_dates = valid_data['dates']
valid_DOW = valid_data['DOW']
valid_variables = valid_data['features']

### Quick view of all the target sequences

# Prepare data for `torch`

In [17]:
train_y = torch.from_numpy(train_y).float().unsqueeze(2) # adding a feature dimension to Ys
train_x = torch.from_numpy(train_x).float()

valid_y = torch.from_numpy(valid_y).float().unsqueeze(2)
valid_x = torch.from_numpy(valid_x).float()

# min-max scale the data

In [18]:
min_max_scalars = torch.zeros(train_x.shape[2], 2)

for i in range(train_x.shape[2]):
    min_max_scalars[i, 0] = train_x[:, :, i].min()
    min_max_scalars[i, 1] = train_x[:, :, i].max()

In [19]:
for i in range(train_x.shape[2]):
    # scale train set with train min/max
    train_x[:, :, i] = ((train_x[:, :, i] - min_max_scalars[i, 0]) /
                        (min_max_scalars[i, 1] - min_max_scalars[i, 0]))
    # scale valid set with train min/max
    valid_x[:, :, i] = ((valid_x[:, :, i] - min_max_scalars[i, 0]) /
                        (min_max_scalars[i, 1] - min_max_scalars[i, 0]))

# Define a simple model

In [20]:
# recycled model code
class LSTMDA(nn.Module):
    def __init__(self, input_dim, hidden_dim, recur_dropout = 0, dropout = 0):
        super().__init__()
        
        self.input_dim = input_dim
        self.hidden_size = hidden_dim
        self.weight_ih = nn.Parameter(torch.Tensor(input_dim, hidden_dim * 4))
        self.weight_hh = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim * 4))
        self.bias = nn.Parameter(torch.Tensor(hidden_dim * 4))
        self.init_weights()
        
        self.dropout = nn.Dropout(dropout)
        self.recur_dropout = nn.Dropout(recur_dropout)
        
        self.dense = nn.Linear(hidden_dim, 1)
        self.dense_activation = nn.Sigmoid()
    
    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)
        
    def forward(self, x, init_states = None):
        """Assumes x is of shape (batch, sequence, feature)"""
        bs, seq_sz, _ = x.size()
        hidden_seq = []
        if init_states is None:
            h_t, c_t = (torch.zeros(bs, self.hidden_size).to(x.device), 
                        torch.zeros(bs, self.hidden_size).to(x.device))
        else:
            h_t, c_t = init_states
        
        x = self.dropout(x)
        HS = self.hidden_size
        for t in range(seq_sz):
            x_t = x[:, t, :]
            # batch the computations into a single matrix multiplication
            gates = x_t @ self.weight_ih + h_t @ self.weight_hh + self.bias
            i_t, f_t, g_t, o_t = (
                torch.sigmoid(gates[:, :HS]), # input
                torch.sigmoid(gates[:, HS:HS*2]), # forget
                torch.tanh(gates[:, HS*2:HS*3]),
                torch.sigmoid(gates[:, HS*3:]), # output
            )
            c_t = f_t * c_t + i_t * self.recur_dropout(g_t)
            h_t = o_t * torch.tanh(c_t)
            hidden_seq.append(h_t.unsqueeze(1))
        hidden_seq = torch.cat(hidden_seq, dim= 1)
        out = self.dense_activation(self.dense(hidden_seq))
        
        return out, (h_t, c_t)

In [24]:
# initialize the model with a seed
torch.manual_seed(0)

# very small model
# maps 13 variables to hidden dim of 1 via LSTM layer
# transforms that LSTM out with a dense layer (scale and bias)
# then sigmoid activation for probability
model = LSTMDA(11, 1).cuda()

# Training

### Train loop

In [25]:
loss_fn = torch.nn.BCELoss()
loss_ls = []
valid_loss_ls = []

optimizer = torch.optim.Adam(model.parameters())
for i in range(epochs):
    train_y_hat, (h, c) = model(train_x.cuda())
    loss = loss_fn(train_y_hat, train_y.cuda())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_ls.append(loss.item())
    
    if i % int(epochs / 10) == 0:
        print(i, loss.item())
        
    with torch.no_grad():
        valid_y_hat, (h, c) = model(valid_x.cuda())
        valid_loss = loss_fn(valid_y_hat, valid_y.cuda())
        valid_loss_ls.append(valid_loss.item())
        
print(epochs, loss.item())

0 0.6976397633552551
1000 0.2334420084953308
2000 0.14772184193134308
3000 0.10554295778274536
4000 0.08626815676689148
5000 0.07577812671661377
6000 0.06888356804847717
7000 0.06474336981773376
8000 0.06207174062728882
9000 0.06020260602235794
10000 0.059035032987594604


# Save predictions for evaluation

To save on file size, I'm not going to rebundle the other objects, they can be combined later with a simple concatenate

In [28]:
train_y_hat, (h, c) = model(train_x.cuda())
valid_y_hat, (h, c) = model(valid_x.cuda())

In [29]:
train_y_hat = train_y_hat.detach().cpu().numpy()
valid_y_hat = valid_y_hat.detach().cpu().numpy()

In [32]:
np.save(train_predictions_fpath, train_y_hat)
np.save(valid_predictions_fpath, valid_y_hat)

# Save model weights and min-max scalars

In [33]:
torch.save(min_max_scalars, data_scalars_fpath)
torch.save(model.state_dict(), model_weights_fpath)