LSTM trained on gridded forcings for each station

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate
import torch.autograd as autograd



In [2]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda' if USE_CUDA else 'cpu')
torch.manual_seed(0)
np.random.seed(0)

writer = SummaryWriter(log_dir='../runs/')

In [2]:
station_data_dict = load_data.load_train_test_lstm()
data_runoff = load_data.load_discharge_gr4j_vic()

  data = pd.read_csv(os.path.join(dir, f), skiprows=2, skipfooter=1, index_col=False, header=None, names=['runoff'], na_values='-1.2345')
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [6]:
class LSTMRegression(nn.Module):
        def __init__(self, input_dim, hidden_dim, num_layers, batch_size):
            super(LSTMRegression, self).__init__()
            self.batch_size = batch_size
            self.hidden_dim = hidden_dim
            self.num_layers = num_layers
            self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers)
            self.linear = nn.Linear(hidden_dim, 1)
            self.hidden = self.init_hidden()
        def init_hidden(self):
            return (torch.randn(self.num_layers, self.batch_size, self.hidden_dim, device=device),
                    torch.randn(self.num_layers, self.batch_size, self.hidden_dim, device=device))

        def forward(self, input):
            lstm_out, self.hidden = self.lstm(input, self.hidden)
            return self.linear(lstm_out[-1])

In [None]:
predictions = {}
actuals = {}
models = {}
seq_len = 7 * 24
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len // 24 + 1)
train_ends = ['2012-12-31', '2013-01-31', '2013-02-28', '2013-03-31', '2013-04-30', '2013-05-31', \
              '2013-06-30', '2013-07-31', '2013-08-31', '2013-09-30', '2013-10-31', '2013-11-30']
test_ends = train_ends[1:] + ['2013-12-31']


plot_list = ['04159492']
median_nse_list = []
for cv_iter in range(len(train_ends)):
    train_end = train_ends[cv_iter]
    test_start = datetime.strptime(train_end, '%Y-%m-%d') + timedelta(days=1)
    test_end = test_ends[cv_iter]
    print('Train: {} - {}, Test: {} - {}'.format(train_start.strftime('%Y-%m-%d'), train_end, test_start.strftime('%Y-%m-%d'), test_end))
    
    nse_list = []
    for station, station_rdrs in station_data_dict.items():
        station_runoff = data_runoff[data_runoff['station'] == station].set_index('date')
        if any(station_runoff['runoff'].isna()):
            print('Station', station, 'had NA runoff values. Skipping.')
            continue

        station_train = station_rdrs.loc[train_start : train_end]
        station_test = station_rdrs.loc[test_start : test_end]
        num_train_days = len(pd.date_range(train_start, train_end, freq='D'))

        x = np.zeros((seq_len, len(pd.date_range(train_start, test_end, freq='D')), station_rdrs.shape[1]))
        for day in range(x.shape[1]):
            x[:,day,:] = station_rdrs[train_start - timedelta(hours = seq_len - 1) + timedelta(days=day) : train_start + timedelta(days=day)]

        # Scale training data
        scalers = []  # save scalers to apply them to test data later
        x_train = x[:,:num_train_days,:]
        for i in range(x.shape[2]):
            scalers.append(preprocessing.StandardScaler())
            x_train[:,:,i] = scalers[i].fit_transform(x_train[:,:,i].reshape((-1, 1))).reshape(x_train[:,:,i].shape)
        x_train = torch.from_numpy(x_train).float().to(device)
        y_train = torch.from_numpy(station_runoff.loc[train_start:train_end, 'runoff'].to_numpy()).float().to(device)

        # Train model
        learning_rate = 2e-3
        patience = 50
        min_improvement = 0.05
        best_loss_model = (-1, np.inf, None)

        # Prepare model
        H = 200
        batch_size = 3
        lstm_layers = 2
        model = LSTMRegression(station_rdrs.shape[1], H, lstm_layers, batch_size).to(device)
        loss_fn = torch.nn.MSELoss(reduction='mean')
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        for epoch in range(300):
            epoch_losses = []
            for i in range(num_train_days // batch_size):
                model.hidden = model.init_hidden()
                y_pred = model(x_train[:,i*batch_size : (i+1)*batch_size,:])

                loss = loss_fn(y_pred, y_train[i*batch_size : (i+1)*batch_size].reshape((batch_size,1))).to(device)
                epoch_losses.append(loss.item())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            epoch_loss = np.array(epoch_losses).mean()
            writer.add_scalar('loss_' + station, epoch_loss, epoch)
            if epoch_loss < best_loss_model[1] - min_improvement:
                best_loss_model = (epoch, epoch_loss, model.state_dict())  # new best model
            elif epoch > best_loss_model[0] + patience:
                print('Patience exhausted in epoch {}. Best loss was {}'.format(epoch, best_loss_model[1]))
                break

        print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
        model.load_state_dict(best_loss_model[2])
        model.eval()        

        # scale test data
        x_test = x[:,num_train_days:,:]
        for i in range(x.shape[2]):
            x_test[:,:,i] = scalers[i].transform(x_test[:,:,i].reshape((-1, 1))).reshape(x_test[:,:,i].shape)
        # if batch size doesn't align with number of samples, add dummies to the last batch
        if x_test.shape[1] % batch_size != 0:
            x_test = np.concatenate([x_test, np.zeros((x_test.shape[0], batch_size - (x_test.shape[1] % batch_size), x_test.shape[2]))], axis=1)

        x_test = torch.from_numpy(x_test).float().to(device)
        predict = station_runoff[test_start:test_end].copy()
        predict['runoff'] = np.nan
        pred_array = np.array([])
        for i in range(x_test.shape[1] // batch_size):
            pred_array = np.concatenate([pred_array, model(x_test[:,i*batch_size : (i+1)*batch_size,:]).detach().cpu().numpy().reshape(batch_size)])
        predict['runoff'] = pred_array[:predict.shape[0]]  # ignore dummies
        predictions[station] = predict
        actuals[station] = station_runoff['runoff'].loc[test_start:test_end]
        models[station] = model
        
        nse = evaluate.evaluate_daily(station + '_CV_{}-{}'.format(test_start.strftime('%Y-%m-%d'), test_end), predict['runoff'], actuals[station], writer=writer)
        nse_list.append(nse)
    
    print('  NSEs: {}:'.format(nse_list))
    median_nse_list.append(np.median(nse_list))

In [None]:
median_nse_list

In [9]:
writer.close()