In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import os
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata
import fastai

# Fast AI Time Series Competition No1: Earthquakes.

Data is already downloaded in `../data/`.

## Data Loading

In [129]:
data_path = pathlib.Path('../data')
batch_size = 32

In [130]:
os.listdir(data_path)

['Earthquakes_TEST.txt',
 'Earthquakes.txt',
 'Earthquakes_TRAIN.arff',
 'GADF',
 'Earthquakes_TRAIN.txt',
 'Earthquakes_TEST.arff']

In [131]:
train = np.loadtxt(data_path/'Earthquakes_TRAIN.txt')
test = np.loadtxt(data_path/'Earthquakes_TEST.txt')

In [132]:
train, train_labels = train[:, 1:], train[:, 0].astype(np.int)
test, test_labels = test[:, 1:], test[:, 0].astype(np.int)

In [133]:
train_ds = torchdata.TensorDataset(
    torch.tensor(train, dtype=torch.float32),
    torch.tensor(train_labels, dtype=torch.float32).unsqueeze(1))
test_ds = torchdata.TensorDataset(
    torch.tensor(test, dtype=torch.float32),
    torch.tensor(test_labels, dtype=torch.float32).unsqueeze(1))

In [146]:
train_dl = torchdata.DataLoader(train_ds, batch_size=batch_size, shuffle=False)
test_dl = torchdata.DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [147]:
data = fastai.DataBunch(train_dl, test_dl)

## LSTM-FCNN
This is a reimplementation of the LSTM-FCNN from https://ieeexplore-ieee-org.ezproxy.cul.columbia.edu/ielx7/6287639/8274985/08141873.pdf?tp=&arnumber=8141873&isnumber=8274985&tag=1

The reported accuracy on the Earthquakes dataset in the paper is .8354.

In [148]:
class ShuffleLSTM(nn.Module):
    def __init__(self, seq_length, dropout=0.0, hidden_size=128, num_layers=1, bidirectional=False):
        super().__init__()
        self.lstm = nn.LSTM(input_size=seq_length,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           bidirectional=bidirectional)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        ## input size will be the batch, input_size, seq_len =  64, 1, 512
        ## we start with the shuffle layer, transforming this in a 1 step multivariate TS:
        ## output should be 1, 64, 512
        x = torch.transpose(x, 0, 1)
        # input_size, batch, seq_len
        out, (_, _) = self.lstm(x)
        # after the lstm, the output will be 1, bs, lstm_size
        return self.dropout(out)

In [149]:
class ConvLayer(nn.Module):
    def __init__(self, in_channels=1, n_channels=128, kernel_size=8, dropout=0.0):
        super().__init__()
        self.conv = nn.Conv1d(in_channels=in_channels,
                             out_channels=n_channels,
                             kernel_size=kernel_size)
        self.bn = nn.BatchNorm1d(num_features=n_channels)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        # input size should be batch, input_size, length = 64, 1, 312 at first
        x = self.conv(x)
        # now batch, n_channels, L
        x = self.bn(x)
        x = F.relu(x)
        # output will be bs, n_filters, L (not equal to inital L)
        return self.dropout(x)
    

In [150]:
class LSTM_FCNN(nn.Module):
    def __init__(self,
                 seq_length,
                 lstm_dropout=0.0,
                 lstm_hidden_size=128,
                 lstm_num_layers=1,
                 lstm_bidirectional=False,
                 conv_n_channels = [128, 256, 128],
                 conv_kernel_sizes = [8, 5, 3],
                 conv_dropout=0.0):
        
        super().__init__()
        
        self.lstm = ShuffleLSTM(seq_length,
                                lstm_dropout,
                                lstm_hidden_size,
                                lstm_num_layers,
                                lstm_bidirectional)
        self.conv1 = ConvLayer(1, conv_n_channels[0], conv_kernel_sizes[0])  
        self.conv2 = ConvLayer(conv_n_channels[0], conv_n_channels[1], conv_kernel_sizes[1])  
        self.conv3 = ConvLayer(conv_n_channels[1], conv_n_channels[2], conv_kernel_sizes[2])  
        
        # global pool is avg pool using the length of the resulting TS
        # we need to calculate this:
        out_len = seq_length - sum(conv_kernel_sizes) + len(conv_kernel_sizes)
        self.global_avg_pool = nn.AvgPool1d(kernel_size = out_len)
        
        
        self.linear = nn.Linear(conv_n_channels[-1] + lstm_hidden_size, 1)
        
        
        
    def forward(self, x):
        # our input is batch, seq_len
        x = x.unsqueeze(1)
        # batch, 1, seq_len
        
        
        lstm_out = self.lstm(x)
        lstm_out = torch.squeeze(lstm_out)
        # lstm_out is bs, lstm_size
        
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # x is now bs, dimension, L
        # avgpool1d averages over the last dimension so we transpose again
        x = self.global_avg_pool(x)
        # bs, dimension, 1
        x = torch.squeeze(x)
        # bs, dimension
        
        concat = torch.cat((lstm_out, x), 1)
        
        return self.linear(concat)


## Training

In [151]:
model = LSTM_FCNN(512, lstm_dropout=0.8, lstm_hidden_size=8)

In [152]:
pos_weight = len(train_labels)/ np.sum(train_labels) -1
pos_weight

4.551724137931035

In [153]:
loss = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight], dtype=torch.float32))
#loss = nn.BCEWithLogitsLoss()
loss = loss.cuda()

In [154]:
def my_acc(pred, targ):
    ok = torch.tensor(torch.abs(torch.sign(pred)/2 + .5 - targ) < .01, dtype=torch.float32)
    return torch.mean(ok)

In [155]:
learner = fastai.Learner(data, model, loss_func=loss, metrics=my_acc)
learner.unfreeze()

In [156]:
#learner.lr_find(); learner.recorder.plot()

In [157]:
learner.fit(200, lr=1e-3)

Total time: 00:20
epoch  train_loss  valid_loss  my_acc  
1      1.027947    1.296351    0.460432  (00:00)
2      1.007556    1.223641    0.568345  (00:00)
3      0.995456    1.184080    0.561151  (00:00)
4      0.985683    1.150312    0.517986  (00:00)
5      0.978588    1.152022    0.525180  (00:00)
6      0.970651    1.144507    0.517986  (00:00)
7      0.965992    1.139562    0.517986  (00:00)
8      0.959701    1.170578    0.568345  (00:00)
9      0.951855    1.143247    0.525180  (00:00)
10     0.948136    1.157148    0.546763  (00:00)
11     0.940357    1.153579    0.546763  (00:00)
12     0.932276    1.677880    0.748201  (00:00)
13     0.920552    1.146090    0.482014  (00:00)
14     0.912557    1.394829    0.697842  (00:00)
15     0.903140    2.431006    0.748201  (00:00)
16     0.898813    1.919390    0.294964  (00:00)
17     0.894403    4.207318    0.748201  (00:00)
18     0.886037    1.219529    0.633094  (00:00)
19     0.870746    1.198161    0.575540  (00:00)
20     0.85

In [60]:
learner.model(torch.tensor(train).cuda())

RuntimeError: cuDNN error: CUDNN_STATUS_BAD_PARAM

In [93]:
inp = torch.tensor(test, dtype=torch.float32).cuda()

In [94]:
model.eval()

LSTM_FCNN(
  (lstm): ShuffleLSTM(
    (lstm): LSTM(512, 32)
    (dropout): Dropout(p=0.8)
  )
  (conv1): ConvLayer(
    (conv): Conv1d(1, 128, kernel_size=(8,), stride=(1,))
    (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (conv2): ConvLayer(
    (conv): Conv1d(128, 256, kernel_size=(5,), stride=(1,))
    (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (conv3): ConvLayer(
    (conv): Conv1d(256, 128, kernel_size=(3,), stride=(1,))
    (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (global_avg_pool): AvgPool1d(kernel_size=(499,), stride=(499,), padding=(0,))
  (linear): Linear(in_features=160, out_features=1, bias=True)
)

In [95]:
out = model(inp)

In [96]:
out = out.detach().cpu().numpy()

In [97]:
out = out.squeeze()

In [98]:
out = (np.sign(out)/2 + .5).astype(np.int)

In [99]:
np.sum(test_labels == out)

79

In [100]:
len(out)

139

In [123]:
for x, y in data.valid_dl:
    break

In [125]:
x.shape

torch.Size([128, 512])

In [126]:
y.shape

torch.Size([128, 1])

In [113]:
model.train()

LSTM_FCNN(
  (lstm): ShuffleLSTM(
    (lstm): LSTM(512, 32)
    (dropout): Dropout(p=0.8)
  )
  (conv1): ConvLayer(
    (conv): Conv1d(1, 128, kernel_size=(8,), stride=(1,))
    (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (conv2): ConvLayer(
    (conv): Conv1d(128, 256, kernel_size=(5,), stride=(1,))
    (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (conv3): ConvLayer(
    (conv): Conv1d(256, 128, kernel_size=(3,), stride=(1,))
    (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.0)
  )
  (global_avg_pool): AvgPool1d(kernel_size=(499,), stride=(499,), padding=(0,))
  (linear): Linear(in_features=160, out_features=1, bias=True)
)

In [114]:
for m in model.children():
    print(m.training)
    for j in m.children():
        print(j)
        print(j.training)

True
LSTM(512, 32)
True
Dropout(p=0.8)
True
True
Conv1d(1, 128, kernel_size=(8,), stride=(1,))
True
BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True
Dropout(p=0.0)
True
True
Conv1d(128, 256, kernel_size=(5,), stride=(1,))
True
BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True
Dropout(p=0.0)
True
True
Conv1d(256, 128, kernel_size=(3,), stride=(1,))
True
BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True
Dropout(p=0.0)
True
True
True


In [159]:
sum(train_labels)

58

In [160]:
len(train_labels)

322

In [161]:
np.mean(train_labels == 0)

0.8198757763975155