### Notebook to configure model

In [1]:
import time

from models.modules.multihead import *
from utils.prepare_data import *

import pandas as pd


import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context(context="talk")

import torch
import torch.nn as nn
import tensorflow as tf
import torch.nn.functional as F
from torchvision import datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

import math, copy, time

from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [72]:
# Hyperparameter

MAX_SEQ_LENGTH = 100
EMBEDDING_SIZE = 10
HIDDEN_SIZE = 512
ATTENTION_SIZE = 64
lr = 1e-3
BATCH_SIZE = 256
KEEP_PROB = 0.5
LAMBDA = 0.0001

VOCAB_SIZE = 5

MAX_LABEL = 2

GENOME_LENGTH = 20000
CONTEXT_SIZE = GENOME_LENGTH


In [None]:
def attention(query, key, value, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn


In [None]:
class NGramDenseEmbedding(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramDenseEmbedding, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs


In [None]:
class AttentionLR(nn.Module):
    
    def __init__(self, input_size, num_classes, d_model, h, dropout=0.1):
        super(AttentionLR, self).__init__()
        
        assert d_model % h == 0
        
        # We assume d_v always equals d_k as defined in the paper
        
        self.d_k = d_model // h
        
        self.h = h
        self.dropout = nn.Dropout(p=dropout)
        self.KQ_attn = None
        self.KQV_attn = None
        self.linears = clones(nn.Linear(input_size, num_classes),1)
        
        self.K = NGramDenseEmbedding(VOCAB_SIZE, EMBEDDING_SIZE, CONTEXT_SIZE)
        self.V = NGramDenseEmbedding(VOCAB_SIZE, EMBEDDING_SIZE, CONTEXT_SIZE)
        
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, query_seq):
        
        Q_lookup = torch.tensor([word_to_ix[w] for w in query_seq], dtype=torch.long)
        
        K_lookup = self.K(Q_lookup)
        V_lookup = self.V(Q_lookup)       
        
        self.KQ_attn, self.KQV_attn = attention(Q_lookup, K_lookup, V_lookup, dropout=self.dropout)
                
        return F.log_softmax(self.linear(self.KQV_attn), dim=1)
    

In [92]:
# load data
x_train, y_train = load_data("../data/train-BRAF.csv", sample_ratio=1)
x_test, y_test = load_data("../data/test-BRAF.csv", sample_ratio=1)


(2440198, 2)
(155244, 2)


In [58]:
word_to_ix = {'N':0, 'A':1, 'C':2, 'T':3, 'G':4}
x_train_l = np.ndarray((len(x_train),MAX_SEQ_LENGTH))

for t in np.arange(len(x_train)):
    line = list(x_train[t])[1:MAX_SEQ_LENGTH+1]
    for k in np.arange(MAX_SEQ_LENGTH):
        x_train_l[t,k] = word_to_ix[line[k]]
    
x_test_l = np.ndarray((len(x_test),MAX_SEQ_LENGTH))

for t in np.arange(len(x_test)):    
    line = list(x_test[t])[1:MAX_SEQ_LENGTH+1]
    for k in np.arange(MAX_SEQ_LENGTH):
        x_test_l[t,k] = word_to_ix[line[k]]
    
print(x_train_l.shape)
print(x_test_l.shape)
  

(2440198, 100)
(155244, 100)


In [86]:
print(x_train_l.shape)
print(x_test_l.shape)

print(x_train_l[0])

(2440198, 100)
(155244, 100)
[3. 3. 1. 4. 3. 3. 1. 1. 1. 1. 1. 3. 4. 1. 4. 4. 3. 3. 4. 4. 1. 1. 1. 4.
 3. 1. 1. 1. 3. 2. 3. 4. 1. 2. 2. 3. 1. 4. 1. 3. 4. 1. 3. 3. 4. 4. 3. 3.
 3. 4. 1. 2. 1. 1. 3. 4. 1. 4. 4. 1. 3. 3. 1. 1. 2. 3. 3. 3. 1. 2. 3. 1.
 1. 3. 3. 1. 4. 1. 3. 3. 1. 3. 4. 3. 4. 4. 2. 3. 4. 1. 3. 4. 4. 4. 3. 4.
 3. 3. 3. 3.]


In [93]:
x_train = x_train_l[0:10000,:]
x_test = x_test_l[0:1000,:]

y_train = y_train[0:10000,:]
y_test = y_test[0:1000,:]

print(x_train.shape)
print(x_test.shape)

(10000, 100)
(1000, 100)


In [88]:
# # data preprocessing
# x_train, x_test, vocab, VOCAB_SIZE = \
#     data_preprocessing(x_train, x_test, MAX_SEQ_LENGTH)
# print(vocab_size)


In [94]:
# split dataset to test and dev
x_test, x_dev, y_test, y_dev, dev_size, test_size = \
    split_dataset(x_test, y_test, 0.1)
print("Validation size: ", dev_size)


1000
100
Validation size:  100


In [95]:

graph = tf.Graph()
with graph.as_default():

    batch_x = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH])
    batch_y = tf.placeholder(tf.float32, [None, MAX_LABEL])
    keep_prob = tf.placeholder(tf.float32)

    embeddings_var = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBEDDING_SIZE], -1.0, 1.0), trainable=True)
    batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_x)
    # multihead attention
    outputs = multihead_attention(queries=batch_embedded, keys=batch_embedded)
    # FFN(x) = LN(x + point-wisely NN(x))
    outputs = feedforward(outputs, [HIDDEN_SIZE, EMBEDDING_SIZE])
    print(outputs.shape)
    outputs = tf.reshape(outputs, [-1, MAX_SEQ_LENGTH * EMBEDDING_SIZE])
    logits = tf.layers.dense(outputs, units=MAX_LABEL)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=batch_y))
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

    # Accuracy metric
    prediction = tf.argmax(tf.nn.softmax(logits), 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(batch_y, 1)), tf.float32))



Conv ret: (?, 100, 128)
(?, 100, 128)


In [96]:

epochs = 5

with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    print("Initialized! ")

    print("Start trainning")
    start = time.time()
    for e in range(epochs):

        epoch_start = time.time()
        print("Epoch %d start !" % (e + 1))
        for x_batch, y_batch in fill_feed_dict(x_train, y_train, BATCH_SIZE):
            fd = {batch_x: x_batch, batch_y: y_batch, keep_prob: KEEP_PROB}
            l, _, acc = sess.run([loss, optimizer, accuracy], feed_dict=fd)

        epoch_finish = time.time()
        print("Validation accuracy and loss: ", sess.run([accuracy, loss], feed_dict={
            batch_x: x_dev,
            batch_y: y_dev,
            keep_prob: 1.0
        }))
        print("epoch time:", epoch_finish - epoch_start , " s")

    print("Training finished, time consumed : ", time.time() - start, " s")
    print("start predicting:  \n")
    test_accuracy = sess.run([accuracy], feed_dict={batch_x: x_test, batch_y: y_test, keep_prob: 1})
    print("Test accuracy : %f %%" % (test_accuracy[0] * 100))



Initialized! 
Start trainning
Epoch 1 start !
Validation accuracy and loss:  [0.47, 0.80978376]
epoch time: 172.9900119304657  s
Epoch 2 start !
Validation accuracy and loss:  [0.46, 0.72138095]
epoch time: 187.37515902519226  s
Epoch 3 start !
Validation accuracy and loss:  [0.5, 0.76346606]
epoch time: 152.2712278366089  s
Epoch 4 start !
Validation accuracy and loss:  [0.6, 0.69934523]
epoch time: 439.39326095581055  s
Epoch 5 start !
Validation accuracy and loss:  [0.38, 0.80945414]
epoch time: 121.4345600605011  s
Training finished, time consumed :  1077.1485340595245  s
start predicting:  

Test accuracy : 42.444444 %
