# DeepLoc Mxnet Port

In [1]:
import mxnet as mx
import numpy
import time
import mxnet.ndarray as nd
import mxnet.initializer as init
from mxnet import np, npx, autograd, optimizer, gluon
from mxnet.gluon import nn, rnn


## Parameters

In [2]:
epoch = 200         #-- integer, epoch
batch_size = 32     #-- integer, minibatches size
max_seq_size = 1000 #-- integer, maximum sequence size
n_hid = 256         #-- integer, number of hidden neurons
n_feat = 20         #-- integer, number of features encoded  X_test.shape[2]
n_class = 10        #-- integer, number of classes to output
lr = 0.0005         #-- float, learning rate
drop_per  = 0.2     #-- float, input dropout
drop_hid = 0.5      #-- float, hidden neurons dropout
n_filt = 10         #-- integer, number of filter in the first convolutional layer
seed = 123456       #-- seed


## Initialization

In [3]:
ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()
mx.random.seed(seed)
npx.random.seed(seed)
numpy.random.seed(seed)

## Training Set and Test Set

In [4]:
train_file = 'subcellular_localization/data/train.npz'
test_file = 'subcellular_localization/data/test.npz'

train_npz = numpy.load(train_file)
test_npz = numpy.load(test_file)


In [5]:
mask_train = nd.from_numpy(train_npz['mask_train'])
partition = nd.from_numpy(train_npz['partition'])
X_train = nd.from_numpy(train_npz['X_train'])
y_train = nd.from_numpy(train_npz['y_train'])
X_test = nd.from_numpy(test_npz['X_test'])
mask_test = nd.from_numpy(test_npz['mask_test'])
y_test = nd.from_numpy(test_npz['y_test'])

In [6]:
train_npz.close()
test_npz.close()

## Network

### Convoluted Layers

In [7]:
class ConvLayer(nn.Block):
    def __init__(self, **kwargs):
        super(ConvLayer, self).__init__(**kwargs)
        f_size_a = 1
        f_size_b = 3
        f_size_c = 5
        f_size_d = 9
        f_size_e = 15
        f_size_f = 21

        with self.name_scope():
            self.l_conv_a = nn.Conv1D(channels=n_filt, kernel_size=f_size_a, padding=0, layout='NCW', activation='relu')
            self.l_conv_b = nn.Conv1D(channels=n_filt, kernel_size=f_size_b, padding=1, layout='NCW', activation='relu')
            self.l_conv_c = nn.Conv1D(channels=n_filt, kernel_size=f_size_c, padding=2, layout='NCW', activation='relu')
            self.l_conv_d = nn.Conv1D(channels=n_filt, kernel_size=f_size_d, padding=4, layout='NCW', activation='relu')
            self.l_conv_e = nn.Conv1D(channels=n_filt, kernel_size=f_size_e, padding=7, layout='NCW', activation='relu')
            self.l_conv_f = nn.Conv1D(channels=n_filt, kernel_size=f_size_f, padding=10, layout='NCW', activation='relu')
            
            self.l_conv_final = nn.Conv1D(channels=64, kernel_size=f_size_b, padding=1, layout='NCW', activation='relu')
        
    def forward(self, x):
        a = self.l_conv_a(x)
        b = self.l_conv_b(x)
        c = self.l_conv_c(x)
        d = self.l_conv_d(x)
        e = self.l_conv_e(x)
        f = self.l_conv_f(x)
        
        conc = nd.concat(a, b, c, d, e, f, dim=1)
        
        return self.l_conv_final(conc)   


### Bidirectional LSTM Layer

In [8]:
class BidirectionalLSTM(nn.Block):
    def __init__(self, **kwargs):
        super(BidirectionalLSTM, self).__init__(**kwargs)
        
        # self.params.get('mask', shape=(batch_size, max_seq_size))
        
        with self.name_scope():
            self.l_fwd = rnn.LSTM(hidden_size=n_hid, layout='TNC', prefix='LSTMFwd_')
            self.l_bck = rnn.LSTM(hidden_size=n_hid, layout='TNC', prefix='LSTMBck_')

    def _reverse(self, x, mask):
        d = [x, mask]
        dataiter = mx.io.NDArrayIter(d)
        x_1 = nd.empty(x.shape, ctx=ctx)
        nd.reset_arrays(x_1, num_arrays=1)
        for i in range(x.shape[0]):
            size = nd.sum(mask[i]).astype('int32').asscalar()
            seq = x[i]
            seq_1 = nd.reverse(nd.slice(seq, begin=(0,0), end=(size, 64)), axis=1)
            seq_1.copyto(x_1[i, :size]) 
        return x_1            
            
    def forward(self, x, mask):
        # print('x shape:', x.shape)
        # mask = self.params.get('mask').data()
        # print('mask:', mask)
        dimension = (1, 0, 2)
        x_fwd = nd.transpose(x, dimension)
        x_bck = nd.transpose(self._reverse(x, mask), dimension)
        fwd = self.l_fwd(x_fwd)
        bck = self.l_bck(x_bck)
        conc = nd.concat(fwd, bck, dim=2)
        return nd.transpose(conc, dimension)


In [9]:
class SlicerLayer(nn.Block):
    
    def __init__(self, **kwargs):
        super(SlicerLayer, self).__init__(**kwargs)
        # self.params.get('mask', shape=(128, 1000))
    
    def forward(self, x, mask):
        # mask = self.params.get('mask').data()
        d = [x, mask]
        dataiter = mx.io.NDArrayIter(d)
        x_shape = x.shape
        x_1_shape = (x.shape[0], x.shape[1])
        x_1 = nd.empty(x_1_shape, ctx=ctx)
        nd.reset_arrays(x_1, num_arrays=1)
        for i in range(x_1_shape[0]):
            size = nd.sum(mask[i]).astype('int32').asscalar()
            x[i][size - 1].copyto(x_1[i])
        return x_1
    

In [10]:
class LSTMAttentionDecodeFeedback(nn.Block):
    def __init__(self,
                 num_units,
                 aln_num_units,
                 n_decodesteps=10,
                 **kwargs):
        
        super(LSTMAttentionDecodeFeedback, self).__init__(prefix='LSTMAttentinDecode_Feedback_', **kwargs)
        
        self.num_units = num_units
        self.aln_num_units = aln_num_units
        self.n_decodesteps = n_decodesteps
        self.attention_softmax_function = nd.softmax
        self.peepholes = True

        self.num_inputs = 512
        
        self.nonlinearity_align=nd.tanh
        
        self.nonlinearity_ingate = nd.sigmoid
        self.nonlinearity_forgetgate = nd.sigmoid
        self.nonlinearity_cell = nd.tanh
        self.nonlinearity_outgate = nd.sigmoid
        
        self.nonlinearity_out = nd.tanh
        
        self.W_hid_to_ingate = self.params.get('W_hid_to_ingate', shape=(num_units, num_units),
                                               init=init.Normal(0.1),
                                               allow_deferred_init=True)
        
        self.W_hid_to_forgetgate = self.params.get('W_hid_to_forgetgate', shape=(num_units, num_units),
                                                   init=init.Normal(0.1),
                                                   allow_deferred_init=True)
        
        self.W_hid_to_cell = self.params.get('W_hid_to_cell', shape=(num_units, num_units),
                                             init=init.Normal(0.1),
                                             allow_deferred_init=True)
        
        self.W_hid_to_outgate = self.params.get('W_hid_to_outgate', shape=(num_units, num_units),
                                                init=init.Normal(0.1),
                                                allow_deferred_init=True)
        
        self.b_ingate = self.params.get('b_ingate', shape=(num_units),
                                        init=init.Constant(0),
                                        allow_deferred_init=True)

        self.b_forgetgate = self.params.get('b_forgetgate', shape=(num_units),
                                            init=init.Constant(0),
                                            allow_deferred_init=True)

        self.b_cell = self.params.get('b_cell', shape=(num_units),
                                      init=init.Constant(0),
                                      allow_deferred_init=True)
        
        self.b_outgate = self.params.get('b_outgate', shape=(num_units),
                                         init=init.Constant(0),
                                         allow_deferred_init=True)
        
        self.W_weightedhid_to_ingate = self.params.get('W_weightedhid_to_ingate',
                                                      shape=(self.num_inputs, num_units),
                                                      init=init.Normal(0.1),
                                                      allow_deferred_init=True)
        
        self.W_weightedhid_to_forgetgate = self.params.get('W_weightedhid_to_forgetgate',
                                                           shape=(self.num_inputs, num_units),
                                                           init=init.Normal(0.1),
                                                           allow_deferred_init=True)
        
        self.W_weightedhid_to_cell = self.params.get('W_weightedhid_to_cell',
                                                     shape=(self.num_inputs, num_units),
                                                     init=init.Normal(0.1),
                                                     allow_deferred_init=True)
        
        self.W_weightedhid_to_outgate = self.params.get('W_weightedhid_to_outgate',
                                                        shape=(self.num_inputs, num_units),
                                                        init=init.Normal(0.1),
                                                        allow_deferred_init=True)
        
        self.W_cell_to_ingate = self.params.get('W_cell_to_ingate',
                                                shape=(num_units),
                                                init=init.Normal(0.1),
                                                allow_deferred_init=True)
        
        self.W_cell_to_forgetgate = self.params.get('W_cell_to_forgetgate',
                                                    shape=(num_units),
                                                    init=init.Normal(0.1),
                                                    allow_deferred_init=True)
        
        self.W_cell_to_outgate = self.params.get('W_cell_to_outgate',
                                                 shape=(num_units),
                                                 init=init.Normal(0.1),
                                                 allow_deferred_init=True)
        
        self.W_align = self.params.get('W_align',
                                       shape=(num_units, self.aln_num_units),
                                       init=init.Normal(0.1))
        
        self.U_align = self.params.get('U_align', shape=(self.num_inputs,self.aln_num_units),
                                       init=init.Normal(0.1),
                                       allow_deferred_init=True)
        
        self.v_align = self.params.get('v_align', shape=(self.aln_num_units, 1),
                                       init=init.Normal(0.1))
        
        with self.name_scope():
            pass

    def slice_w(self, x, n):
        return x[:, n*self.num_units:(n+1)*self.num_units]
    
    def step(self, cell_previous, hid_previous, alpha_prev, weighted_hidden_prev,
            input, mask, hUa, W_align, v_align,
            W_hid_stacked, W_weightedhid_stacked, W_cell_to_ingate,
            W_cell_to_forgetgate, W_cell_to_outgate,
            b_stacked, *args):
        
        sWa = nd.dot(hid_previous, W_align)  # (BS, aln_num_units)
        sWa = nd.expand_dims(sWa, axis=1)    # (BS, 1 aln_num_units) 
        align_act = sWa + hUa
        tanh_sWahUa = nd.tanh(align_act)     # (BS, seqlen, num_units_aln)
        
        # CALCULATE WEIGHT FOR EACH HIDDEN STATE VECTOR
        a = nd.dot(tanh_sWahUa, v_align)  # (BS, Seqlen, 1)
        a = nd.reshape(a, (a.shape[0], a.shape[1]))
        #                                # (BS, Seqlen)
        # # ->(BS, seq_len)
        
        a = a*mask - (1-mask)*10000
        
        alpha = self.attention_softmax_function(a)
        
        # input: (BS, Seqlen, num_units)
        weighted_hidden = input * nd.expand_dims(alpha, axis=2)
        weighted_hidden = nd.sum(weighted_hidden, axis=1)  #sum seqlen out

        # (BS, dec_hid) x (dec_hid, dec_hid)
        gates = nd.dot(hid_previous, W_hid_stacked) + b_stacked
        # (BS, enc_hid) x (enc_hid, dec_hid)
        gates = gates + nd.dot(weighted_hidden, W_weightedhid_stacked)

        
        # Clip gradients
        # if self.grad_clipping is not False:
        #    gates = theano.gradient.grad_clip(
        #        gates, -self.grad_clipping, self.grad_clipping)

        # Extract the pre-activation gate values
        ingate = self.slice_w(gates, 0)
        forgetgate = self.slice_w(gates, 1)
        cell_input = self.slice_w(gates, 2)
        outgate = self.slice_w(gates, 3)

        if self.peepholes:
            # Compute peephole connections
            ingate = ingate + cell_previous*W_cell_to_ingate
            forgetgate = forgetgate + (cell_previous*W_cell_to_forgetgate)
            
        # Apply nonlinearities
        ingate = self.nonlinearity_ingate(ingate)
        forgetgate = self.nonlinearity_forgetgate(forgetgate)
        cell_input = self.nonlinearity_cell(cell_input)
        outgate = self.nonlinearity_outgate(outgate)
        
        # Compute new cell value
        cell = forgetgate*cell_previous + ingate*cell_input
        
        if self.peepholes:
            outgate = outgate + cell*W_cell_to_outgate

        # W_align:  (num_units, aln_num_units)
        # U_align:  (num_feats, aln_num_units)
        # v_align:  (aln_num_units, 1)
        # hUa:      (BS, Seqlen, aln_num_units)
        # hid:      (BS, num_units_dec)
        # input:    (BS, Seqlen, num_inputs)

        # Compute new hidden unit activation
        hid = outgate*self.nonlinearity_out(cell)

        return [cell, hid, alpha, weighted_hidden]            
            
        
    def forward(self, input, mask):
        
        num_batch = input.shape[0]
        encode_seqlen = input.shape[1]
        
        W_hid_stacked = nd.concat(
            self.W_hid_to_ingate.data(),
            self.W_hid_to_forgetgate.data(),
            self.W_hid_to_cell.data(),
            self.W_hid_to_outgate.data(),
            dim=1)
        
        W_weightedhid_stacked = nd.concat(
            self.W_weightedhid_to_ingate.data(),
            self.W_weightedhid_to_forgetgate.data(),
            self.W_weightedhid_to_cell.data(),
            self.W_weightedhid_to_outgate.data(),
            dim=1)
        
        b_stacked = nd.concat(
            self.b_ingate.data(),
            self.b_forgetgate.data(),
            self.b_cell.data(),
            self.b_outgate.data(),
            dim=0)
        
        cell = nd.zeros((num_batch, self.num_units), ctx=ctx)
        hid = nd.zeros((num_batch, self.num_units), ctx=ctx)
        alpha = nd.zeros((num_batch, encode_seqlen), ctx=ctx)
        weighted_hidden = nd.zeros((num_batch, self.num_units), ctx=ctx)
        
        hUa = nd.dot(input, self.U_align.data())
        W_align = self.W_align.data()
        v_align = self.v_align.data()
        
        W_cell_to_ingate = self.W_cell_to_ingate.data()
        W_cell_to_forgetgate = self.W_cell_to_forgetgate.data()
        W_cell_to_outgate = self.W_cell_to_outgate.data()
        
        for i in range(self.n_decodesteps):        
            cell, hid, alpha, weighted_hidden = self.step(cell, hid, alpha, weighted_hidden,
                input, mask, hUa, W_align, v_align,
                W_hid_stacked, W_weightedhid_stacked, W_cell_to_ingate,
                W_cell_to_forgetgate, W_cell_to_outgate,
                b_stacked)
        
        return weighted_hidden
    

In [11]:
class Model(nn.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        
        with self.name_scope():
            self.l_dropout_1 = nn.Dropout(rate=drop_per)
            self.l_dropout_2 = nn.Dropout(rate=drop_hid)
            self.l_dropout_3 = nn.Dropout(rate=drop_hid)
            self.l_dropout_4 = nn.Dropout(rate=drop_hid)
            self.l_conv = ConvLayer()
            self.l_lstm = BidirectionalLSTM()
            self.l_dense = nn.Dense(units=n_class, activation='relu')
            self.l_decoder = LSTMAttentionDecodeFeedback(num_units=2*n_hid, aln_num_units=n_hid, n_decodesteps=10)
    
    def forward(self, input, mask):
        x = self.l_dropout_1.forward(input)
        x = nd.transpose(x, (0, 2, 1))
        x = self.l_conv.forward(x)
        x = nd.transpose(x, (0, 2, 1))
        x = self.l_dropout_2.forward(x)
        x = self.l_lstm.forward(x, mask)
        x = self.l_decoder(x, mask)
        x = self.l_dropout_3.forward(x)
        x = self.l_dense.forward(x)
        x = self.l_dropout_4.forward(x)
        
        return x
        

### Training loop

In [12]:
mask_train = mask_train.as_in_context(ctx)
X_train = X_train.as_in_context(ctx)
y_train = y_train.as_in_context(ctx)

data_iter = mx.io.NDArrayIter([X_train, mask_train], y_train, batch_size, shuffle=True)

loss = gluon.loss.SoftmaxCrossEntropyLoss()

model = Model()
model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

params = model.collect_params()
params.reset_ctx([ctx])

trainer = gluon.Trainer(params=params,
                        optimizer='adam', optimizer_params={'learning_rate':lr})

for e in range(epoch):
    begin_time = time.perf_counter()
    train_loss = 0.
    acc = mx.metric.Accuracy()
    data_iter.reset()
    i = -1
    for batch in data_iter:
        input = batch.data[0]
        mask = batch.data[1]
        label = batch.label[0]
        
        with mx.autograd.record():
            output = model(input, mask)
            l = loss(output, label)
                
        l.backward()
        trainer.step(batch_size)
        
        train_loss += l.mean().asscalar()
        preds = output.argmax(axis=1)
        acc.update(label, preds)
        i += 1
    
    stop_time = time.perf_counter()
    total_time = stop_time - begin_time
    print("Epoch %d" % (e + 1))
    print("  Time %.5f seconds" % (total_time))
    print("  Train Accuracy: %.5f\t Train Loss: %.5f" % (acc.get()[1], train_loss/(i+1)))

model.save_parameters("net.params")
    

Epoch 1
  Time 38.59019 seconds
  Train Accuracy: 0.19295	 Train Loss: 2.24271
Epoch 2
  Time 39.44133 seconds
  Train Accuracy: 0.26468	 Train Loss: 2.10914
Epoch 3
  Time 41.44274 seconds
  Train Accuracy: 0.34794	 Train Loss: 1.91495
Epoch 4
  Time 42.32647 seconds
  Train Accuracy: 0.36409	 Train Loss: 1.85213
Epoch 5
  Time 42.10921 seconds
  Train Accuracy: 0.34291	 Train Loss: 1.90410
Epoch 6
  Time 41.94321 seconds
  Train Accuracy: 0.36451	 Train Loss: 1.88685
Epoch 7
  Time 41.13389 seconds
  Train Accuracy: 0.40793	 Train Loss: 1.75326
Epoch 8
  Time 42.25175 seconds
  Train Accuracy: 0.40289	 Train Loss: 1.75172
Epoch 9
  Time 41.45896 seconds
  Train Accuracy: 0.40520	 Train Loss: 1.75271
Epoch 10
  Time 42.25646 seconds
  Train Accuracy: 0.38968	 Train Loss: 1.78953
Epoch 11
  Time 41.79307 seconds
  Train Accuracy: 0.36095	 Train Loss: 1.85658
Epoch 12
  Time 42.19111 seconds
  Train Accuracy: 0.35529	 Train Loss: 1.89907
Epoch 13
  Time 42.30597 seconds
  Train Accuracy

Epoch 104
  Time 50.86430 seconds
  Train Accuracy: 0.45805	 Train Loss: 1.52226
Epoch 105
  Time 51.83350 seconds
  Train Accuracy: 0.45742	 Train Loss: 1.52197
Epoch 106
  Time 48.55796 seconds
  Train Accuracy: 0.46057	 Train Loss: 1.51827
Epoch 107
  Time 48.28434 seconds
  Train Accuracy: 0.45931	 Train Loss: 1.50955
Epoch 108
  Time 49.02268 seconds
  Train Accuracy: 0.46497	 Train Loss: 1.51534
Epoch 109
  Time 47.78355 seconds
  Train Accuracy: 0.45784	 Train Loss: 1.53680
Epoch 110
  Time 48.45295 seconds
  Train Accuracy: 0.46644	 Train Loss: 1.50516
Epoch 111
  Time 47.47860 seconds
  Train Accuracy: 0.46162	 Train Loss: 1.53554
Epoch 112
  Time 47.54410 seconds
  Train Accuracy: 0.43184	 Train Loss: 1.60476
Epoch 113
  Time 49.33185 seconds
  Train Accuracy: 0.43729	 Train Loss: 1.61509
Epoch 114
  Time 48.33796 seconds
  Train Accuracy: 0.46015	 Train Loss: 1.53342
Epoch 115
  Time 47.79952 seconds
  Train Accuracy: 0.42408	 Train Loss: 1.62822
Epoch 116
  Time 49.09722 se

## Prediction

In [14]:

mask_test = mask_test.as_in_context(ctx)
X_test = X_test.as_in_context(ctx)
y_test = y_test.as_in_context(ctx)

data_iter = mx.io.NDArrayIter([X_test, mask_test], y_test, batch_size, shuffle=False)

model = Model()

model.load_parameters("net.params", ctx=ctx)

acc = mx.metric.Accuracy()

for batch in data_iter:
    input = batch.data[0]
    mask = batch.data[1]
    label = batch.label[0]

    with mx.autograd.predict_mode():
        output = model(input, mask)
        
    preds = output.argmax(axis=1)
    acc.update(label, preds)

print("Test Accuracy: %.5f" % (acc.get()[1]))

Test Accuracy: 0.78372
