In [1]:
import os
import json
from pprint import pprint as pp
from glob import iglob
import pickle

import numpy as np

from wirusy.DataLoader import DataLoader

In [2]:
dataloader = DataLoader(data_folder='data', tests_folder ='tests')
MAX_LEN_VIRUS = 497513
MAX_LEN_BACTERIA = 16040666
FEATURES = ['blastn-rbo/eval10/rbo_unmerged_ranks_1000hits', 'blastn/eval10/best_hsp_bitscore', 'crispr/pilercr-default/max_mismatch2', 'gc_content/difference', 'kmer-canonical/k6/chebyshev']

In [3]:
data = None
with open(dataloader.tests_folder+"/-225/virus-seq-and-features.p", "rb") as fd:
    data = pickle.load(fd)


In [32]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import concatenate, Input, Dense, Conv1D, MaxPool1D, Dropout, Flatten

In [49]:
def conv(filters: int, kernel_size: int, activation='relu', initializer='he_uniform') -> Conv1D:
    """
    Creates 1D convolutional layer with default settings.
    """
    return Conv1D(filters=filters, kernel_size=kernel_size, padding='same',
                  activation=activation, kernel_initializer=initializer)

def dense(units: int, activation='relu', initializer='he_uniform') -> Dense:
    """
    Created fully-connected layer with default settings.
    """
    return Dense(units=units, activation=activation, kernel_initializer=initializer)

def build_model(sequence_length) -> Model:
    """
    Builds neural network classifier model based on 1D convolutional layers.
    """
    input_layer = Input(shape=(sequence_length, 4))

    codon = conv(filters=64, kernel_size=3)(input_layer)
    codon = Dropout(0.5)(codon)

    conv1 = MaxPool1D(pool_size=2)(codon)
    conv1 = conv(filters=16, kernel_size=12)(conv1)
    conv1 = conv(filters=16, kernel_size=12)(conv1)

    conv2 = MaxPool1D(pool_size=2)(conv1)
    conv2 = conv(filters=16, kernel_size=24)(conv2)
    conv2 = conv(filters=16, kernel_size=24)(conv2)

    conv1 = MaxPool1D(pool_size=4)(conv1)
    conv3 = MaxPool1D(pool_size=2)(conv2)
    conv3 = Dropout(0.5)(conv3)
    conv3 = concatenate([conv1, conv3])
    conv3 = conv(filters=16, kernel_size=32)(conv3)
    conv3 = conv(filters=16, kernel_size=32)(conv3)

    conv2 = MaxPool1D(pool_size=4)(conv2)
    conv4 = MaxPool1D(pool_size=2)(conv3)
    conv4 = concatenate([conv2, conv4])
    conv4 = conv(filters=16, kernel_size=24)(conv4)
    conv4 = MaxPool1D(pool_size=3)(conv4)

    combined = Flatten()(conv4)
    combined = Dropout(0.5)(combined)
    combined = dense(units=256)(combined)
    combined = dense(units=256)(combined)
    output_1 = Dense(units=10, activation='softmax')(combined)

    #End of CNN, Begining of adding additional bacterial features

    input_2 = Input(shape=(5,))
    concat = concatenate([output_1, input_2])

    flat = Flatten()(concat)
    den = dense(units=256)(flat)
    den = dense(units=256)(den)
    output_all = Dense(units=2, activation='sigmoid')(den)

    model = Model(inputs=[input_layer, input_2], outputs=output_all)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 'mae'])
    model.summary()

    return model

In [50]:
model = build_model(MAX_LEN_VIRUS)

Model: "model_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_23 (InputLayer)           [(None, 497513, 4)]  0                                            
__________________________________________________________________________________________________
conv1d_93 (Conv1D)              (None, 497513, 64)   832         input_23[0][0]                   
__________________________________________________________________________________________________
dropout_38 (Dropout)            (None, 497513, 64)   0           conv1d_93[0][0]                  
__________________________________________________________________________________________________
max_pooling1d_81 (MaxPooling1D) (None, 248756, 64)   0           dropout_38[0][0]                 
___________________________________________________________________________________________