In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import time
from sklearn.metrics import r2_score
import time
from IPython.display import HTML
import json
from sklearn.preprocessing import LabelEncoder

import torch.utils.data
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold, train_test_split, GroupKFold, GroupShuffleSplit

batch_size = 1024


In [2]:
class RNN(nn.Module):

    def __init__(self, in_dim, hidden_dim, num_layers, dropout, bidirectional, num_classes, batch_size):
        super(RNN, self).__init__()
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.bidirectional = bidirectional
        self.num_dir = 2 if bidirectional else 1
        self.num_layers = num_layers
        self.dropout = dropout

        self.lstm = nn.LSTM(input_size=self.in_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, dropout=self.dropout/2, bidirectional=self.bidirectional,
                            batch_first=True)
        self.gru = nn.GRU(self.hidden_dim * 2, self.hidden_dim, bidirectional=self.bidirectional, batch_first=True)


        self.fc = nn.Sequential(
            nn.Linear(int(hidden_dim)*self.num_dir, 1),
#             nn.Softmax()
        )

    def forward(self, x):
        
        self.lstm.flatten_parameters()
        self.gru.flatten_parameters()
        
        lstm_out, _ = self.lstm(x)
        gru_out, _ = self.gru(lstm_out)
        
        x = torch.cat((lstm_out, gru_out), 1)
        y = self.fc(x)
        
        return y

In [3]:
def plot_training_process(df: pd.DataFrame(), epoch_col: str, value_columns: list, y_axis_name: str, title: str):
    
    # code mostly based on: https://altair-viz.github.io/gallery/multiline_tooltip.html
    plot_df = df.melt(id_vars=epoch_col, value_vars=value_columns, var_name='group', value_name=y_axis_name)
    plot_df[y_axis_name] = plot_df[y_axis_name].round(4)
    nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=[epoch_col], empty='none')
    line = alt.Chart().mark_line(interpolate='basis').encode(
        x=f'{epoch_col}:Q',
        y=f'{y_axis_name}:Q',
        color='group:N',
    ).properties(
        title=title
    )

    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = alt.Chart().mark_point().encode(
        x=f'{epoch_col}:Q',
        opacity=alt.value(0),
    ).add_selection(
        nearest
    )

    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
    )

    # Draw text labels near the points, and highlight based on selection
    text = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, f'{y_axis_name}:Q', alt.value(' '))
    )

    # Draw a rule at the location of the selection
    rules = alt.Chart().mark_rule(color='gray').encode(
        x=f'{epoch_col}:Q',
    ).transform_filter(
        nearest
    )

    # Put the five layers into a chart and bind the data
    return alt.layer(line, selectors, points, rules, text,
              data=plot_df, width=600, height=300).interactive()

In [4]:
def train_net(train_loader, val_loader, patience, model, criterion, optimizer, scheduler, verbose, plot_training):
    valid_loss_min = np.Inf
    patience = patience
    # current number of epochs, where validation loss didn't increase
    p = 0
    # whether training should be stopped
    stop = False

    epochs = 20000
    training_logs = []

    for e in range(1, epochs + 1):
        # print(time.ctime(), 'Epoch:', e)
        train_loss = []
        train_acc = []
        model.train()
        for batch_i, (data, target) in enumerate(train_loader):
            data, target = data.cuda(), target.float().cuda()

            optimizer.zero_grad()
            output = model(data)
            output = output[:,-1,:]
            loss = criterion(output, target.unsqueeze(1))
            train_loss.append(loss.item())

            a = target.data.cpu().numpy()
            b = output.squeeze().detach().cpu().numpy()
            train_acc.append(r2_score(a, b))

            loss.backward()
            optimizer.step()

        val_loss = []
        val_acc = []
        for batch_i, (data, target) in enumerate(val_loader):
            data, target = data.cuda(), target.float().cuda()
            output = model(data)
            output = output[:,-1,:]
            loss = criterion(output, target.unsqueeze(1))
            val_loss.append(loss.item()) 
            a = target.data.cpu().numpy()
            b = output.squeeze().detach().cpu().numpy()
            val_acc.append(r2_score(a, b))

        if e % 1 == 0 and verbose:
            print(f'Epoch {e}, train loss: {np.mean(train_loss):.4f}, valid loss: {np.mean(val_loss):.4f}, train acc: {np.mean(train_acc):.4f}, valid acc: {np.mean(val_acc):.4f}')

        training_logs.append([e, np.mean(train_loss), np.mean(val_loss), np.mean(train_acc), np.mean(val_acc)])

        scheduler.step(np.mean(val_loss))

        valid_loss = np.mean(val_loss)
        if valid_loss <= valid_loss_min:
            # print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, valid_loss))
            torch.save(model.state_dict(), 'model.pt')
            valid_loss_min = valid_loss
            p = 0

        # check if validation loss didn't improve
        if valid_loss > valid_loss_min:
            p += 1
            # print(f'{p} epochs of increasing val loss')
            if p > patience:
                print('Stopping training')
                stop = True
                break        

        if stop:
            break

    checkpoint = torch.load('model.pt')      
    model.load_state_dict(checkpoint)
    
    if plot_training:
        training_logs = pd.DataFrame(training_logs, columns=['Epoch', 'Train loss', 'Valid loss', 'Train accuracy', 'Validation accuracy'])
        loss_plot = plot_training_process(df=training_logs, epoch_col='Epoch', value_columns=['Train loss', 'Valid loss'], y_axis_name='loss', title='Loss progress')
        acc_plot = plot_training_process(df=training_logs, epoch_col='Epoch', value_columns=['Train accuracy', 'Validation accuracy'], y_axis_name='accuracy', title='Accuracy progress')
        render(loss_plot & acc_plot)
    
    return model

In [5]:
def initialize_model():
    torch.manual_seed(42)
    model = RNN(63, 256, 3, 0.1, True, 9, batch_size)
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=.5)
    model = nn.DataParallel(model.cuda())
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=8, factor=0.5, verbose=True)
    return model, criterion, optimizer, scheduler


In [6]:
def train_net_folds(X, X_test, y, folds, plot_training, batch_size, patience, verbose):

    oof = np.zeros((len(X), 9))
    prediction = np.zeros((len(X_test), 9))
    scores = []
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X, y)):
        print('Fold', fold_n, 'started at', time.ctime())
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        train_set = torch.utils.data.TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
        val_set = torch.utils.data.TensorDataset(torch.FloatTensor(X_valid), torch.LongTensor(y_valid))

        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size)
                
        model, criterion, optimizer, scheduler = initialize_model()
        model = train_net(train_loader, val_loader, patience, model, criterion, optimizer, scheduler, verbose, plot_training)
        
        y_pred_valid = []
        for batch_i, (data, target) in enumerate(val_loader):
            data, target = data.cuda(), target.cuda()
            p = model(data)
            pred = p.cpu().detach().numpy()
            y_pred_valid.extend(pred)
            
        y_pred = []
        for i, data in enumerate(test):
            p = model(torch.FloatTensor(data).unsqueeze(0).cuda())
            y_pred.append(p.cpu().detach().numpy().flatten())
            
        oof[valid_index] = np.array(y_pred_valid)
        scores.append(r2_score(y_valid, np.array(y_pred_valid)))

        prediction += y_pred

    prediction /= n_fold
    
    prediction = np.array(prediction).argmax(1)
    
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
    print('--' * 50)
    
    return oof, prediction

In [7]:
data_x = np.load('data_x.npy')
data_y = np.load('data_y.npy')
print(data_x.shape)
print(data_y.shape)

(2007211, 20, 63)
(2007211,)


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.1, random_state=42)

In [None]:
n_fold = 5
folds = KFold(n_splits=n_fold, shuffle=True, random_state=42)
oof, prediction = train_net_folds(X_train, X_test, y_train, folds, True, batch_size, 40, True)

Fold 0 started at Wed Aug 21 14:41:36 2019
Epoch 1, train loss: 0.0026, valid loss: 0.0026, train acc: -0.0333, valid acc: -0.0085
Epoch 2, train loss: 0.0026, valid loss: 0.0025, train acc: -0.0030, valid acc: 0.0239
Epoch 3, train loss: 0.0026, valid loss: 0.0025, train acc: -0.0105, valid acc: -0.0153
Epoch 4, train loss: 0.0025, valid loss: 0.0024, train acc: 0.0098, valid acc: 0.0450
Epoch 5, train loss: 0.0025, valid loss: 0.0024, train acc: 0.0126, valid acc: 0.0454
Epoch 6, train loss: 0.0025, valid loss: 0.0024, train acc: 0.0285, valid acc: 0.0435
Epoch 7, train loss: 0.0024, valid loss: 0.0024, train acc: 0.0335, valid acc: 0.0246
Epoch 8, train loss: 0.0024, valid loss: 0.0024, train acc: 0.0352, valid acc: 0.0465
Epoch 9, train loss: 0.0024, valid loss: 0.0024, train acc: 0.0371, valid acc: 0.0451
Epoch 10, train loss: 0.0024, valid loss: 0.0024, train acc: 0.0392, valid acc: 0.0421
Epoch 11, train loss: 0.0024, valid loss: 0.0024, train acc: 0.0382, valid acc: 0.0461
Epoc

Epoch 93, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0607, valid acc: 0.0588
Epoch 94, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0616, valid acc: 0.0588
Epoch 95, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0590, valid acc: 0.0584
Epoch 96, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0601, valid acc: 0.0589
Epoch 97, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0606, valid acc: 0.0592
Epoch 98, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0608, valid acc: 0.0591
Epoch 99, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0607, valid acc: 0.0579
Epoch 100, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0599, valid acc: 0.0590
Epoch 101, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0612, valid acc: 0.0590
Epoch 102, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0570, valid acc: 0.0592
Epoch 103, train loss: 0.0024, valid loss: 0.0023, train acc: 0.0610, valid acc: 0.0591
Epoch   102: reducing learning rate of 