In [1]:
"""
Example usage of C3PO. 
Author: Angela M Yu, Copyright 2023
License: GPL-3.0 license
"""

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

"""
Adapted from https://github.com/pjsample/human_5utr_modeling 
"""
def one_hot_encode(sequences, max_seq_len=25, mask_val=-1, padding='left'):
    # Dictionary returning one-hot encoding of nucleotides. 
    nuc_d = {'a':[1,0,0,0],
             'c':[0,1,0,0],
             'g':[0,0,1,0],
             't':[0,0,0,1],
             'n':[0,0,0,0],
            'm':[mask_val,mask_val,mask_val,mask_val]}
    
    # Creat empty matrix
    one_hot_seqs = np.ones([len(sequences), max_seq_len, 4])*mask_val
    
    # Iterate through sequences and one-hot encode
    for i, seq in enumerate(sequences):
        # Truncate
        seq = seq[:max_seq_len].lower()
        # Convert to array
        one_hot_seq = np.array([nuc_d[x] for x in seq])
        # Append to matrix
        if padding=='left':
            one_hot_seqs[i, :len(seq), :] = one_hot_seq
        elif padding=='right':
            one_hot_seqs[i, -len(seq):, :] = one_hot_seq
        else:
            ValueError(f'padding {padding} not recognized')
            
    return one_hot_seqs


In [2]:
"""
Define example sequences and convert into one-hot representation
"""

example_sequences = ["GGCAAATGTTTTTATTTGTACACTC", "CAAGTTAACAACAACAATTGCATTC", "CAATGCCTACTAAATAAAAGATTTA"]
onehot_seqs = one_hot_encode(example_sequences)


In [3]:
"""
Load C3PO model
"""

C3P0_model = load_model("C3PO.hdf5")


[2023-01-06 22:20:24.078 ip-172-31-11-19.us-west-1.compute.internal:26359 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2023-01-06 22:20:24.100 ip-172-31-11-19.us-west-1.compute.internal:26359 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


In [4]:
"""
Predict using C3PO
"""

ratio_predictions = C3P0_model.predict(onehot_seqs, workers=4, use_multiprocessing=True)
print("Columns are predictions for Cmpd2 doses 0.5 μM, 2.5 μM, 12.5 μM, respectively")
print("Rows are each sequence, in the same order as input")
print("ratio_predictions = \n", ratio_predictions)


Columns are predictions for Cmpd2 doses 0.5 μM, 2.5 μM, 12.5 μM, respectively
Rows are each sequence, in the same order as input
ratio_predictions = 
 [[-0.02325627 -0.4374298  -0.722089  ]
 [ 0.06323956  0.16961205  0.34404048]
 [-0.18593498 -0.18253124 -0.15191878]]
