In [1]:
from src.models import hmm, decoders
import numpy as np

In [2]:
# index annotation observation_states=[i,j] 
observation_states = ['regulatory', 'regulatory-potential'] # observed regulatory activity in the TBX5 TAD of cardiomyocytes

# index annotation hidden_states=[i,j]
hidden_states = ['encode-atac', 'atac'] # In order of the two cCRE selection strategies (encode_atac, atac)

# Import the HMM input data for progenitor cardiomyocytes (prefix: prog_cm)
prog_cm_data = np.load('./data/ProjectDeliverable-ProgenitorCMs.npz')

# Instantiate submodule class models.HiddenMarkovModel with progenitor cardiomyocytes
# observation and hidden states, and prior, transition, and emission probabilities.
prog_cm_hmm_object = hmm.HiddenMarkovModel(observation_states,
                                           hidden_states,
                                           prog_cm_data['prior_probabilities'], #  prior probabilities of hidden states in the order specified in the hidden_states list
                                           prog_cm_data['transition_probabilities'], # transition_probabilities[:,hidden_states[i]]
                                           prog_cm_data['emission_probabilities'])  # emission_probabilities[hidden_states[i],:][:,observation_states[j]]

In [3]:
# Instantiate submodule class models.ViterbiAlgorithm with the progenitor cardiomyocyte's HMM
prog_cm_viterbi_instance = decoders.ViterbiAlgorithm(prog_cm_hmm_object)

# Decode the hidden states (i.e., CRE selection strategy) for the progenitor CMs and evaluate the model performace
evaluate_viterbi_decoder_using_observation_states_of_prog_cm = prog_cm_viterbi_instance.best_hidden_state_sequence(prog_cm_data['observation_states'])

# Evaluate the accuracy of using the progenitor cardiomyocyte HMM and Viterbi algorithm to decode the progenitor CM's CRE selection strategies
# NOTE: Model is expected to perform with 80% accuracy
assert np.sum(prog_cm_data['hidden_states'] == evaluate_viterbi_decoder_using_observation_states_of_prog_cm)/len(prog_cm_data['observation_states']) == 0.8


[1. 0. 0. 0. 1. 0. 1. 1. 1. 1.]
[[3.00000000e-01 8.00000000e-02 1.28000000e-02 2.04800000e-03
  4.91520000e-04 1.96608000e-04 4.71859200e-05 2.83115520e-05
  1.13246208e-05 3.01989888e-06]
 [4.00000000e-01 4.00000000e-02 9.60000000e-03 1.53600000e-03
  9.83040000e-04 9.83040000e-05 9.43718400e-05 3.77487360e-05
  1.50994944e-05 1.50994944e-06]]


AssertionError: 

In [7]:
print("prog cm decoded states:")
print(evaluate_viterbi_decoder_using_observation_states_of_prog_cm)
print("prog cm hidden states:")
print(prog_cm_data['hidden_states'])
print("accuracy: ")
print(np.sum(prog_cm_data['hidden_states'] == evaluate_viterbi_decoder_using_observation_states_of_prog_cm)/len(prog_cm_data['observation_states']))

prog cm decoded states:
['atac' 'encode-atac' 'encode-atac' 'encode-atac' 'atac' 'encode-atac'
 'atac' 'atac' 'atac' 'atac']
prog cm hidden states:
['atac' 'encode-atac' 'encode-atac' 'atac' 'atac' 'encode-atac'
 'encode-atac' 'encode-atac' 'encode-atac' 'encode-atac']
accuracy: 
0.5


In [None]:
prim_cm_data = np.load('./data/ProjectDeliverable-PrimitiveCMs.npz')

# Instantiate submodule class models.ViterbiAlgorithm with the progenitor cardiomyocyte's HMM
prim_cm_viterbi_instance = decoders.ViterbiAlgorithm(prog_cm_hmm_object)

# Decode the hidden states of the primitive cardiomyocyte's regulatory observation states
decoded_hidden_states_for_observed_states_of_prim_cm = prim_cm_viterbi_instance.best_hidden_state_sequence(prim_cm_data['observation_states'])
assert np.sum(prim_cm_data['hidden_states'] == decoded_hidden_states_for_observed_states_of_prim_cm)/len(prim_cm_data['observation_states']) == 0.8

In [None]:
print("prim cm decoded states:")
print(decoded_hidden_states_for_observed_states_of_prim_cm)
print("prim cm hidden states:")
print(prim_cm_data['hidden_states'])
print("accuracy: ")
print(np.sum(prim_cm_data['hidden_states'] == decoded_hidden_states_for_observed_states_of_prim_cm)/len(prim_cm_data['observation_states']))

In [None]:
prim_cm_data['hidden_states'] == prog_cm_data['hidden_states']

In [None]:
prim_cm_data['observation_states'] == prog_cm_data['observation_states']