In [None]:
import os
import pandas as pd
import numpy as np
import pickle
from decimal import Decimal
import time
import argparse
from random import shuffle
import copy 

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
#from torchnet.meter import AverageValueMeter
import torch.backends.cudnn as cudnn

In [None]:
import torch.utils.data as data
# from sklearn import preprocessing
# from sklearn.preprocessing import MinMaxScaler

## Prepare data

In [None]:
parser = {
    'nb_epoch': 20,
    'test_size': 0.1,
    'learning_rate': 0.001,
    'samples_per_epoch': 64,
    'batch_size': 64,
    'cuda': False,
    'seed': 7
}

args = argparse.Namespace(**parser)
args.cuda = args.cuda and torch.cuda.is_available()

if args.cuda:
    torch.cuda.manual_seed(args.seed)

## Read CSV

In [None]:
X_train_df = pd.read_csv('../data/C1-6_CanTho/C1/Training_Input.txt', sep='\t', header=None)

In [None]:
X_train_df.head()

In [None]:
y_train_df = pd.read_csv('../data/C1-6_CanTho/C1/Training_Target.txt', sep='\t', header=None)

In [None]:
y_train_df.head()

In [None]:
X_valid_df = pd.read_csv('../data/C1-6_CanTho/C1/Validation_Input.txt', sep='\t', header=None)
y_valid_df = pd.read_csv('../data/C1-6_CanTho/C1/Validation_target.txt', sep='\t', header=None)

In [None]:
X_test_df = pd.read_csv('../data/C1-6_CanTho/C1/Testing_Input.txt', sep='\t', header=None)
y_test_df = pd.read_csv('../data/C1-6_CanTho/C1/Testing_Target.txt', sep='\t', header=None)

## MinMax Normalize

In [None]:
class Normalizer:
    
    def __init__(self):
        pass

    def fit(self, X):
        self.X = X
        self.min = min(X)
        self.max = max(X)
        
    def transform(self, X):
        X_norm = (self.X - self.min) / (self.max - self.min)
        return X_norm
    
    def inverve_transform(self, X_norm):
        X = X_norm * (self.max - self.min) + self.min
        return X

In [None]:
def getPreprapredData(X_df, y_df):
    X_rainfall = copy.deepcopy(X_df[0].values)
    X_current = copy.deepcopy(X_df[1].values)
    
    y = copy.deepcopy(y_df[0].values)
    rainfall_normalizer = Normalizer()
    current_normalizer = Normalizer()
    rainfall_normalizer.fit(X_df[0].values)
    current_normalizer.fit(X_df[1].values)

    X_rainfall = rainfall_normalizer.transform(X_rainfall)
    X_current = current_normalizer.transform(X_current)
    y_normalized = current_normalizer.transform(y)
    X_normalized = np.array(list(zip(X_rainfall, X_current)))
    return X_normalized, y_normalized, current_normalizer

In [None]:
X_train_normalized, y_train_normalized, train_current_normalizer = getPreprapredData(X_train_df, 
                                                           y_train_df)

In [None]:
X_valid_normalized, y_valid_normalized, valid_current_normalizer = getPreprapredData(X_valid_df, 
                                                           y_valid_df)

In [None]:
X_test_normalized, y_test_normalized, test_current_normalizer = getPreprapredData(X_test_df, 
                                                         y_test_df)

In [None]:
X_test_normalized.shape

In [None]:
# X_train_normalized = preprocessing.normalize(X_train_df.values, norm='l2')
# y_train_normalized = preprocessing.normalize(y_train_df.values, norm='l2')

## Define Dataset and Model

In [None]:
class WaterDataset(data.Dataset):
    
    def __init__(self, X, y , is_training=True, transform=None):
        self.X = X
        self.y = y
        self.is_training = is_training
        # self.transform = transform


    def __getitem__(self, index):        
        return self.X[index], self.y[index]
    
    def __len__(self):
        return self.X.shape[0]

In [None]:
# Here we define our model as a class
class LSTM(nn.Module):

    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1,
                    num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)

        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim*self.num_layers, output_dim)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, x, cuda=False):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
#         lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1))
        
#         # Only take the output from the final timetep
#         # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
#         y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1))
#         return y_pred.view(-1)
    
        tt = torch.cuda if cuda else torch
        h = Variable(tt.FloatTensor(self.num_layers, x.size(1),self.hidden_dim).zero_(), requires_grad=False)
        c = Variable(tt.FloatTensor(self.num_layers, x.size(1), self.hidden_dim).zero_(), requires_grad=False)
        _, (h_t, c_t) = self.lstm(x, (h, c))
        #h_t = F.tanh(h_t.squeeze(0))
        #h_t = F.tanh(h_t)
        out = torch.tanh(self.linear(h_t.view(-1, self.hidden_dim*self.num_layers)))
        return out

In [None]:
train_dataset = WaterDataset(X_train_normalized, y_train_normalized)
valid_dataset = WaterDataset(X_valid_normalized, y_valid_normalized)
test_dataset = WaterDataset(X_test_normalized, y_test_normalized)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)

## Train

In [None]:
# Training
def train(epoch, net, dataloader, optimizer, criterion, use_cuda):
    net.train()
    train_loss = 0
    
    for batch_idx, (X, y) in enumerate(dataloader):

        optimizer.zero_grad()
        X = X.view(1, X.size(0), 2)
        X, y = Variable(X.float()), Variable(y.float())
        if use_cuda:
            X, y = X.cuda(), y.cuda()
        out = net(X)
        loss = criterion(out, y.view(y.size(0), 1))
        loss.backward()
        optimizer.step()
        train_loss += loss.data.item()
            
        if batch_idx % 100 == 0:
            print('Train Loss: %.3f '
                % (train_loss/((batch_idx+1)*3)))

In [None]:
# Valid
def valid(epoch, net, dataloader, criterion, use_cuda):
    net.eval()
    valid_loss = 0
    
    for batch_idx, (X, y) in enumerate(dataloader):
        X = X.view(1, X.size(0), 2)
        X, y = Variable(X.float()), Variable(y.float())
        if use_cuda:
            X, y = X.cuda(), y.cuda()
        out = net(X)
        loss = criterion(out, y.view(y.size(0), 1))
        valid_loss += loss.data.item()
            
        if batch_idx % 100 == 0:
            print('Valid Loss: %.3f '
                % (valid_loss/((batch_idx+1)*3)))

In [None]:
net = LSTM(2, 10, batch_size=args.batch_size, output_dim=1, num_layers=2)
optimizer = optim.Adam(net.parameters(), lr=args.learning_rate)

if args.cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

criterion = nn.MSELoss()

In [None]:
for epoch in range(0, args.nb_epoch):
    #optimizer = lr_scheduler(optimizer, epoch, lr_decay_epoch=args.lr_decay_epoch)	
    print('\nEpoch: %d' % epoch)
    train(epoch, net, train_loader, optimizer, criterion, args.cuda)
    valid(epoch, net, valid_loader, criterion, args.cuda)

## Testing

In [None]:
# Test
def test(net, dataloader, criterion, use_cuda):
    net.eval()
    test_loss = 0
    y_pred = []
    
    for batch_idx, (X, y) in enumerate(dataloader):
        X = X.view(1, X.size(0), 2)
        X, y = Variable(X.float()), Variable(y.float())
        if use_cuda:
            X, y = X.cuda(), y.cuda()
        out = net(X)
        loss = criterion(out, y.view(y.size(0), 1))
        test_loss += loss.data.item()
        y_pred.extend(out.view(-1).detach().numpy())
            
    print('Test Loss: %.3f '
                % (test_loss/((batch_idx+1)*3)))
    return np.array(y_pred)

In [None]:
y_pred = test(net, test_loader, criterion, args.cuda)

In [None]:
y_pred_unnormalized = test_current_normalizer.inverve_transform(y_pred)

In [None]:
y_pred_unnormalized.shape

In [None]:
y_pred_df = pd.DataFrame(y_pred_unnormalized)
y_pred_df['index'] = y_pred_df.index
y_test_df['index'] = y_test_df.index
y_pred_df.head()

## Visualization

In [None]:
plt.rcParams['figure.figsize'] = [12, 8]

In [None]:
plt.plot(y_pred_unnormalized, label='y_pred', color='blue')
plt.plot(y_test_df[0].values, label='y_test', color='orange')
plt.legend(loc='lower left')
plt.title('Test vs Prediction')