In [1]:
# Case Studies

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import numpy as np
from numpy import genfromtxt
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from feature import get_motif_feature

In [3]:
# Prepared RNA motifs
case1 = ['(C,G) AGACA (G,C) CCA'] # OC-3, TRAF3
case2 = ['(G,C) UCG (C,G) AAGU'] # 6’-fluorosisomicin, rRNA A-site
case3 = ['(A,U) C (U,A) C', '(G,C) C (G,C) C', '(G,C) A (C,G)'] # Targrpremir-210, miR-210 precursor
case4 = ['(G,C) AACUA (C,G)'] # Isis-11, HCV RNA

In [4]:
# print result scores
def print_results(results, num_motif, num_sm):
    # print results
    for i in range(num_motif):
        for j in range(num_sm):
            index = i*num_sm+j
            if results[index][0]>0.85: #threshold
                print('motif '+str(i)+', SM '+str(j)+': ', end='')
                print(results[index][0])

In [5]:
# making prediction
def predict(DNN_model, motif_feature, SM_feature):
    
    # concatenate motif and molecule features
    motif_SM = []
    for motif in motif_feature: # might have multiple motifs
        for sm in SM_feature:
            motif_SM.append(np.concatenate((motif, sm)))

    # convert to numpy array, and reshape
    motif_SM = np.array(motif_SM)
    motif_SM = motif_SM.reshape(-1, 1620, 1) # CNN

    # predict motif-SM associations
    results = DNN_model.predict(motif_SM, verbose=0)
    print_results(results, len(motif_feature), len(SM_feature))

In [6]:
# load model
DNN_model  = tf.keras.models.load_model('./model/best_DNN_model.h5')

# load 50 testing molecular features
# case 1: SM11; case 2: SM5; case 3: SM8; case 4: SM10
SM_feature = genfromtxt('./data/SM_feature.csv', delimiter=',', skip_header = 1)

In [7]:
# load motif features for case1
motif_feature = get_motif_feature(case1) # case 1-4

# predict SM for the case
predict(DNN_model, motif_feature, SM_feature)

motif 0, SM 10: 0.99227387
motif 0, SM 11: 0.9964098
motif 0, SM 46: 0.93303007


In [8]:
# load motif features for case2
motif_feature = get_motif_feature(case2) # case 1-4

# predict SM for the case
predict(DNN_model, motif_feature, SM_feature)

motif 0, SM 5: 0.85699654
motif 0, SM 9: 0.9648426
motif 0, SM 10: 0.9968967
motif 0, SM 11: 0.9997902
motif 0, SM 40: 0.8636249
motif 0, SM 41: 0.96957284
motif 0, SM 46: 0.99774563


In [9]:
# load motif features for case3
motif_feature = get_motif_feature(case3) # case 1-4

# predict SM for the case
predict(DNN_model, motif_feature, SM_feature)

motif 0, SM 7: 0.9877862
motif 0, SM 8: 0.9704423
motif 2, SM 6: 0.87052536
motif 2, SM 8: 0.9973473


In [10]:
# load motif features for case4
motif_feature = get_motif_feature(case4) # case 1-4

# predict SM for the case
predict(DNN_model, motif_feature, SM_feature)

motif 0, SM 9: 0.9781861
motif 0, SM 10: 0.9994508
motif 0, SM 11: 0.9978571
motif 0, SM 12: 0.9693369
motif 0, SM 37: 0.93156445
motif 0, SM 38: 0.9450922
motif 0, SM 39: 0.98003674
motif 0, SM 40: 0.9871629
motif 0, SM 41: 0.9873412
motif 0, SM 46: 0.99314255
