In [1]:
from keras.models import load_model
from pkg_resources import resource_filename
import numpy as np
import pandas as pd
from Bio.Seq import Seq
import keras.backend as kb

def one_hot_encode(seq):
    map = np.asarray([[0, 0, 0, 0],
                      [1, 0, 0, 0],
                      [0, 1, 0, 0],
                      [0, 0, 1, 0],
                      [0, 0, 0, 1]])

    seq = seq.upper().replace('A', '\x01').replace('C', '\x02')
    seq = seq.replace('G', '\x03').replace('T', '\x04').replace('N', '\x00')

    return map[np.fromstring(seq, np.int8) % 5]

def categorical_crossentropy_2d(y_true, y_pred):
    return - kb.mean(y_true[:, :, 0]*kb.log(y_pred[:, :, 0]+1e-10)
                   + y_true[:, :, 1]*kb.log(y_pred[:, :, 1]+1e-10))

paths = ('Model/mouseRRACH10000_c{}.h5'.format(x) for x in range(1, 6))
models = [load_model(y, custom_objects={'categorical_crossentropy_2d': categorical_crossentropy_2d}) for y in paths]
context = 10000

## Replace the input with your custom sequence
input_sequence = "CCAGCAATGAGCAGTCTTCTTTCTCCTGTCTCCTAGGCCTACTTAGGAGCCTACGTTTTTATCATCTTCGCTGCCTTCCTCATCTTCTTCCTAATCTTCACCTTCTTCAAAGTCCCGGAGACCAAAGGCAGGACTTTCGAGGACATTGCCCGGGCCTTCGAGGGGCAGGCGCACTCTGGAAAAGGCCCTGCCGGTGTGGAGTTGAACAGCATGCAGCCGGTCAAGGAGACCCCTGGCAACGCCTGAGCCGGGAGCACCTCCTTCACCTCCCTCCACTGTGGAAAGCCACCTCCCCTTAAGTGGGGAGACCTCATCAGGATGAACCAGGACTGCTTCTGAGTGCTGCTATTCTCACCCCACACACTCCATGAAAACTCAGCTGCACCCAATGCTGGGGTTGACCAGATCGCCAATGACTTTTAAGTGTTTGATTTGGGGGATACTTCCCTTGTAATCAGGAAAGACCAAGGAAGCCTACCTTTATATTGGGAGGGAAGGGGCCGCAGCTCCCCTTAGGTTCTAAAACCCGCTAACTAGGACAACTAGGGAGTAGGAGTAGGGTGCAGCACCGCCCCCACCACCACCTTAGTTTGCCAGGAAACAGATCTTCATACCCAGTGTGGAGGGCCCTGGGGGATTGAACAAAAGACCCCCTCCTCACTTGATACAGCTCTGCACAGCAAAGTAACTTGAGTTTTATTTATTTTATCTTCAGGTTGAATTGCATAAATATTTATTTTTTTAATTGTAATTTTACCAAATAATGAGACAGTAACGAAATTGAGGTTGGAGGGAGGTGTT"

x = one_hot_encode('N'*(context//2) + input_sequence + 'N'*(context//2))[None, :]
y = np.mean([models[m].predict(x) for m in range(5)], axis=0)
Probability = y[0, :, 1]

  return map[np.fromstring(seq, np.int8) % 5]




In [15]:
Probability[Probability>0.5]

array([0.5534225, 0.7042742], dtype=float32)

In [9]:
models[0].summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, 4)]    0           []                               
                                                                                                  
 conv1d_40 (Conv1D)             (None, None, 32)     160         ['input_2[0][0]']                
                                                                                                  
 batch_normalization_33 (BatchN  (None, None, 32)    128         ['conv1d_40[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activation_33 (Activation)     (None, None, 32)     0           ['batch_normalization_33[0]

In [7]:
y.shape

(1, 801, 2)

In [3]:
Probability

array([2.39197095e-12, 5.03653976e-13, 2.43502245e-12, 4.39970109e-12,
       6.33702596e-12, 7.26805118e-13, 2.75068466e-12, 1.39214525e-11,
       8.72496953e-13, 1.99020230e-13, 8.92018555e-13, 7.52533290e-13,
       1.12425054e-12, 1.80888171e-12, 5.49833996e-13, 1.83132350e-12,
       1.27788470e-12, 6.01147441e-13, 1.03988970e-11, 7.08475986e-12,
       5.76699868e-11, 7.42357217e-12, 1.06791919e-11, 8.82055887e-12,
       6.48274750e-12, 2.89576371e-12, 3.07737061e-12, 1.31466157e-11,
       2.56061513e-12, 2.36886205e-12, 6.77526232e-13, 1.55742541e-12,
       2.02635673e-12, 1.91920075e-13, 5.58974091e-13, 3.04471760e-11,
       1.53885394e-11, 3.69861178e-13, 5.93705098e-13, 8.27691214e-12,
       2.12705851e-12, 1.15277091e-12, 7.48258931e-13, 1.62248513e-12,
       6.00135774e-11, 2.04626767e-12, 5.63718794e-11, 9.79789361e-10,
       2.03237382e-09, 6.64220632e-12, 1.42008833e-12, 5.42737253e-12,
       1.06469746e-11, 5.49489165e-13, 3.76655434e-12, 9.29349393e-12,
      

In [None]:
model = load_model('Model/mouseRRACH10000_c1.h5',custom_objects={'categorical_crossentropy_2d': categorical_crossentropy_2d})

In [11]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, 4)]    0           []                               
                                                                                                  
 conv1d_40 (Conv1D)             (None, None, 32)     160         ['input_2[0][0]']                
                                                                                                  
 batch_normalization_33 (BatchN  (None, None, 32)    128         ['conv1d_40[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activation_33 (Activation)     (None, None, 32)     0           ['batch_normalization_33[0]

In [3]:
for i in np.where(Probability>1e-7)[0]:
    print(input_sequence[i])


A
G
G
A
C
A
G
T
A
G
G
G
A
G
G
G
A
G
A
C
G
A
A
C
A
G
G
G
C
G
A
C
C
C
A
A
G
G
G
G
T
A
A
G
A
A
G
C
A
A
A
C
G
G
G
G
G
T
C
A
G
A
A
G
T
G
C
G
A
G
A
A
A
A
G
A
A
T
A
A
G


In [13]:
Probability.shape

(801,)

In [16]:
len(input_sequence)

801