In [1]:
from __future__ import print_function, division
import numpy as np
import h5py
import scipy.io
import random
import sys,os
import itertools
import numbers
from collections import Counter
from warnings import warn
from abc import ABCMeta, abstractmethod
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.utils import shuffle
import matplotlib.pyplot

In [2]:
import tensorflow as tf

In [6]:
# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(1337)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
#python_random.seed(1337)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1337)
#older version of tensorflow
#tf.set_random_seed(1337)

In [7]:
import os

In [8]:
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
os.environ['CUDA_VISIBLE_DEVICES'] = "2,3,4"

In [5]:
num_threads = 4
# Maximum number of threads to use for OpenMP parallel regions.
os.environ["OMP_NUM_THREADS"] = "4"
# Without setting below 2 environment variables, it didn't work for me. Thanks to @cjw85 
os.environ["TF_NUM_INTRAOP_THREADS"] = "4"
os.environ["TF_NUM_INTEROP_THREADS"] = "4"

tf.config.threading.set_inter_op_parallelism_threads(
    num_threads
)
tf.config.threading.set_intra_op_parallelism_threads(
    num_threads
)
tf.config.set_soft_device_placement(True)

In [12]:
from tensorflow.keras.optimizers import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.regularizers import (
    l2, 
    l1, 
    l1_l2
)
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras import (
    activations, 
    initializers, 
    regularizers, 
    constraints
)

In [14]:
import matplotlib
import matplotlib.pyplot as plt

In [15]:
from tensorflow.keras.callbacks import (
    ModelCheckpoint, 
    EarlyStopping
)
from sklearn.metrics import (
    roc_curve,
    auc,
    roc_auc_score,
    average_precision_score,
    precision_recall_curve,
)

In [17]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.utils import resample, shuffle
from sklearn.feature_selection import (
    SelectKBest,
    chi2
)
from collections import defaultdict

In [18]:
from attention_layer import Attention, attention_flatten

In [27]:
def build_model():
	print('building model')

	seq_input_shape = (1000,4)
	nb_filter = 256
	filter_length = 9
	attentionhidden = 256

	seq_input = Input(shape = seq_input_shape, name = 'seq_input')
	convul1   = Convolution1D(filters = nb_filter,
                        	  kernel_size = filter_length,
                        	  padding = 'valid',
                        	  activation = 'relu',
                        	  kernel_constraint = MaxNorm(3),
                        	  )

	pool_ma1 = MaxPooling1D(pool_size = 3)
	dropout1 = Dropout(0.5977908689086315)
	dropout2 = Dropout(0.50131233477637737)
	decoder  = Attention(hidden = attentionhidden, activation = 'linear')
	dense1   = Dense(1)
	dense2   = Dense(1)

	output_1 = pool_ma1(convul1(seq_input))
	output_2 = dropout1(output_1)
	att_decoder  = decoder(output_2)
	output_3 = attention_flatten(output_2.shape[2])(att_decoder)

	output_4 =  dense1(dropout2(Flatten()(output_2)))
	all_outp =  concatenate([output_3, output_4])
	output_5 =  dense2(all_outp)
	output_f =  Activation('sigmoid')(output_5)

	model = Model(inputs = seq_input, outputs = output_f)
	model.compile(loss = 'binary_crossentropy', optimizer = 'nadam', metrics = ['accuracy'])

	print (model.summary())
	return model


In [20]:
def data_processing():
    x_visdb = np.load('data/x_VISDB_fulldata.npy')
    y_visdb = np.load('data/y_VISDB_fulldata.npy')

    ###split 9:1
    trainx, valx, trainy, valy = train_test_split(x_visdb, y_visdb, test_size = 0.1, stratify=y_visdb, random_state=42)

    ###test 1:1
    neg_val = np.where(valy == 0)
    pos_val = np.where(valy == 1)
    xval_positive = valx[pos_val]
    yval_positive = valy[pos_val]
    xval_negative = valx[neg_val]
    yval_negative = valy[neg_val]

    np.random.seed(42) 
    permutation = np.random.permutation(xval_negative.shape[0])
    xval_negative_1 = xval_negative[permutation[:xval_positive.shape[0]], :, :]
    yval_negative_1 = yval_negative[permutation[:xval_positive.shape[0]]]

    valx = np.concatenate((xval_positive, xval_negative_1), axis=0)
    valy = np.concatenate((yval_positive, yval_negative_1), axis=0)

    valx2, valy2 = shuffle(valx, valy, random_state=42)
    
    return trainx, trainy, valx2, valy2


In [21]:
def run_model():
    trainx, trainy, valx, valy = data_processing()

    model = build_model()
    model.load_weights('model/Final_model.h5')

    print('testing')

    y_pred = model.predict(valx, verbose = 1)

    auroc = roc_auc_score(valy, y_pred)
    aupr = average_precision_score(valy, y_pred)

    np.save('data/y_pred.npy', y_pred)
    np.save('data/valy.npy', valy)

    print('auroc = ', auroc)
    print('aupr = ', aupr)

In [28]:
if __name__ == '__main__':
	build_model()
	run_model()

building model
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
seq_input (InputLayer)          [(None, 1000, 4)]    0                                            
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 992, 256)     9472        seq_input[0][0]                  
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D)  (None, 330, 256)     0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 330, 256)     0           max_pooling1d_1[0][0]            
_______________________________________________________________________________