In [1]:
%load_ext autoreload
%autoreload 2
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate
import torch.autograd as autograd

In [2]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda' if USE_CUDA else 'cpu')

writer = SummaryWriter()

CUDA Available


In [3]:
data_runoff = load_data.load_discharge_gr4j_vic()

  data = pd.read_csv(os.path.join(dir, f), skiprows=2, skipfooter=1, index_col=False, header=None, names=['runoff'], na_values='-1.2345')
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [4]:
# For each station, read which grid cells belong to its subwatershed
station_cell_mapping = pd.read_csv('station_cell_mapping.csv', skiprows=1, names=['station', 'lat', 'lon', 'row', 'col', 'area'])

In [5]:
rdrs_data = load_data.load_rdrs_forcings()

  var_data = pd.DataFrame(rdrs_nc[var][:].reshape(43825,34*39))


In [6]:
predictions = {}
actuals = {}
seq_len = 7 * 24
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len // 24 + 1)
train_end = '2013-12-31'
test_start = '2014-01-01'
test_end = '2014-12-31'

for station in data_runoff['station'].unique():
    print(station)
    station_runoff = data_runoff[data_runoff['station'] == station].set_index('date')
    station_cell_ids = 39 * station_cell_mapping[station_cell_mapping['station'] == station]['col'] \
        + station_cell_mapping[station_cell_mapping['station'] == station]['row']
    station_rdrs = rdrs_data.filter(regex='_(' + '|'.join(map(lambda x: str(x), station_cell_ids)) + ')$', axis=1)
    
    if any(station_runoff['runoff'].isna()):
        print('Station', station, 'had NA runoff values. Skipping.')
        continue
    
    station_train = station_rdrs.loc[train_start : train_end]
    station_test = station_rdrs.loc[test_start : test_end]
    num_train_days = len(pd.date_range(train_start, train_end, freq='D'))
    print('  Fitting model')
    
    H = 100
    batch_size = 3
    class LSTMRegression(nn.Module):
        def __init__(self, input_dim, hidden_dim, num_layers, batch_size):
            super(LSTMRegression, self).__init__()
            self.batch_size = batch_size
            self.hidden_dim = hidden_dim
            self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers)
            self.linear = nn.Linear(hidden_dim, 1)
            self.hidden = self.init_hidden()
        def init_hidden(self):
            return (torch.randn(2, self.batch_size, self.hidden_dim, device=device),
                    torch.randn(2, self.batch_size, self.hidden_dim, device=device))

        def forward(self, input):
            lstm_out, self.hidden = self.lstm(input, self.hidden)
            return self.linear(lstm_out[-1])
    model = LSTMRegression(station_rdrs.shape[1], H, 2, batch_size).to(device)
    
    loss_fn = torch.nn.MSELoss(reduction='mean')
    
    x = np.zeros((seq_len, len(pd.date_range(train_start, test_end, freq='D')), station_rdrs.shape[1]))
    for day in range(x.shape[1]):
        x[:,day,:] = station_rdrs[train_start - timedelta(hours = seq_len - 1) + timedelta(days=day) : train_start + timedelta(days=day)]
    
    scalers = []
    x_train = x[:,:num_train_days,:]
    for i in range(x.shape[2]):
        scalers.append(preprocessing.StandardScaler())
        x_train[:,:,i] = scalers[i].fit_transform(x_train[:,:,i].reshape((-1, 1))).reshape(x_train[:,:,i].shape)
    x_train = torch.from_numpy(x_train).float().to(device)
    y_train = torch.from_numpy(station_runoff.loc[train_start:train_end, 'runoff'].to_numpy()).float().to(device)
    
    learning_rate = 3e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(100):
        epoch_losses = []
        for i in range(num_train_days // batch_size):
            model.hidden = model.init_hidden()
            y_pred = model(x_train[:,i*batch_size : (i+1)*batch_size,:])

            loss = loss_fn(y_pred, y_train[i*batch_size : (i+1)*batch_size].reshape((batch_size,1))).to(device)
            epoch_losses.append(loss.item())
            if i % 50 == 0:
                print(epoch, i, loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        epoch_losses = np.array(epoch_losses)
        print('Epoch', epoch, 'mean loss:', epoch_losses.mean())
        writer.add_scalar('loss_' + station, epoch_losses.mean(), epoch)

    x_test = x[:,num_train_days:,:]
    for i in range(x.shape[2]):
        x_test[:,:,i] = scalers[i].transform(x_test[:,:,i].reshape((-1, 1))).reshape(x_test[:,:,i].shape)
    if x_test.shape[1] % batch_size != 0:  # if batch size doesn't align with number of samples, add dummies
        x_test = np.concatenate([x_test, np.zeros((x_test.shape[0], batch_size - (x_test.shape[1] % batch_size), x_test.shape[2]))], axis=1)
    
    x_test = torch.from_numpy(x_test).float().to(device)
    predict = station_runoff[test_start:test_end].copy()
    predict['runoff'] = np.nan
    pred_array = np.array([])
    print('  Predicting')
    iterations = x_test.shape[1] // batch_size
    for i in range(iterations):
        pred_array = np.concatenate([pred_array, model(x_test[:,i*batch_size : (i+1)*batch_size,:]).detach().cpu().numpy().reshape(batch_size)])
    predict['runoff'] = pred_array[:predict.shape[0]]  # ignore dummies
    predictions[station] = predict
    actuals[station] = station_runoff['runoff'].loc[test_start:test_end]

04159900
  Fitting model
0 0 0.43398356437683105
0 50 4.66759729385376
0 100 15.616595268249512
0 150 186.7809600830078
0 200 16.411176681518555
0 250 8.710270881652832
0 300 2.1504337787628174
0 350 0.0178106427192688
0 400 176.9302520751953
0 450 4.514988422393799
Epoch 0 mean loss: 40.91200969704669
1 0 11.73190689086914
1 50 4.092259883880615
1 100 1.1586664915084839
1 150 175.04443359375
1 200 4.623472690582275
1 250 6.333711624145508
1 300 1.192802906036377
1 350 2.9563357830047607
1 400 146.4366912841797
1 450 4.944770336151123
Epoch 1 mean loss: 39.783458892899674
2 0 8.267946243286133
2 50 12.495262145996094
2 100 3.949660301208496
2 150 242.92018127441406
2 200 18.00117301940918
2 250 2.7628612518310547
2 300 5.7670464515686035
2 350 3.337369203567505
2 400 158.7948760986328
2 450 11.305464744567871
Epoch 2 mean loss: 41.482837872133274
3 0 3.9310462474823
3 50 8.630685806274414
3 100 4.347559452056885
3 150 189.6335906982422
3 200 9.0425443649292
3 250 7.760447025299072
3 30

In [7]:
nse_list = []
plot_list = ['04159900', '04159492', '02GG013']
for station, predict in predictions.items():
    nse = evaluate.evaluate_daily(station, predict['runoff'], actuals[station], plot=station in plot_list, writer=writer)
    nse_list.append(nse)
    
    print(station, '\tNSE: (clipped to 0)', nse_list[-1])

print('Median NSE (clipped to 0)', np.median(nse_list), '/ Min', np.min(nse_list), '/ Max', np.max(nse_list))


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


04159900 	NSE: (clipped to 0) 0.18441701133265087
02GC010 	NSE: (clipped to 0) 0.3943169862015541
04215500 	NSE: (clipped to 0) 0.353594196029715
04174500 	NSE: (clipped to 0) 0.04541428245939927
04165500 	NSE: (clipped to 0) 0.31264624788014084
02GB001 	NSE: (clipped to 0) 0.29324799035085847
04200500 	NSE: (clipped to 0) 0.44027490378707
04199500 	NSE: (clipped to 0) 0.2625497326752958
04177000 	NSE: (clipped to 0) 0.08799249929035491
04208504 	NSE: (clipped to 0) 0.32458536319871567
04207200 	NSE: (clipped to 0) 0.428411080237837
04213000 	NSE: (clipped to 0) 0.2262173462749184
02GE007 	NSE: (clipped to 0) 0.14495132236254937
02GG009 	NSE: (clipped to 0) -0.03413619394202683
04195820 	NSE: (clipped to 0) 0.15393481496457462
04159492 	NSE: (clipped to 0) 0.009149339523299327
04161820 	NSE: (clipped to 0) 0.0470437344823087
02GG003 	NSE: (clipped to 0) 0.10696680278864579
04196800 	NSE: (clipped to 0) 0.0550422409031891
04215000 	NSE: (clipped to 0) 0.16339840306122466
04166500 	NSE: 

In [8]:
writer.close()