### Inference (Index-based Encoding)

In [1]:
target_names = ['Human Adenovirus',
                'MERS-CoV',
                'Parainfluenza virus',
                'Rhinovirus-HRV',
                'SARS-CoV-2',
                'Zaire Ebolavirus',
                'Zika virus']

In [2]:
import os
import numpy as np
import pandas as pd
from Bio import SeqIO
import tensorflow as tf
import tensorflow.keras.backend as K

In [3]:
def dna_index_encoding(Text, n):
    dna_token = {"AAA": 1, "AAC": 2, "AAT": 3, "AAG": 4, "ACA": 5, "ACC": 6, "ACT": 7, "ACG": 8, "ATA": 9, "ATC": 10, "ATT": 11, "ATG": 12, "AGA": 13, "AGC": 14, "AGT": 15, "AGG": 16,
                "CAA": 17, "CAC": 18, "CAT": 19, "CAG": 20, "CCA": 21, "CCC": 22, "CCT": 23, "CCG": 24, "CTA": 25, "CTC": 26, "CTT": 27, "CTG": 28, "CGA": 29, "CGC": 30, "CGT": 31, "CGG": 32,
                "GAA": 33, "GAC": 34, "GAT": 35, "GAG": 36, "GCA": 37, "GCC": 38, "GCT": 39, "GCG": 40, "GTA": 41, "GTC": 42, "GTT": 43, "GTG": 44, "GGA": 45, "GGC": 46, "GGT": 47, "GGG": 48,
                "TAA": 49, "TAC": 50, "TAT": 51, "TAG": 52, "TCA": 53, "TCC": 54, "TCT": 55, "TCG": 56, "TTA": 57, "TTC": 58, "TTG": 59, "TGA": 60, "TGC": 61, "TGT": 62, "TGG": 63, "TTT": 64}

    encoding_sequence = []
    l=len(Text)

    for i in range (l-n+1):
        Pattern = Text[i:i+n]
        try:
            index = dna_token[Pattern]
            encoding_sequence.append(index)
        except KeyError as e:
            print("Unknown sequence:", e)
            continue

    return encoding_sequence

### Load model

In [4]:
model = tf.keras.models.load_model("model1.h5")

In [53]:
input_data = "J01917.1.fasta"
sequences = []
seq_IDs = []
for record in SeqIO.parse(input_data, "fasta"):
    sequences.append(record.seq)
    seq_IDs.append(record.id)

result = dna_index_encoding(str(sequences[0]), 3)

In [54]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# pad sequences for filling the blank arrays
x = pad_sequences([result], padding='post', truncating='post', maxlen=35999)
x

array([[19, 10, 53, ...,  0,  0,  0]])

In [55]:
x.shape

(1, 35999)

In [None]:
new_pred = np.round(model.predict(x))
new_pred

In [57]:
target_names[np.argmax(new_pred)]

'Human Adenovirus'