In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# read the covid 2 contig at data/covis_contig_1.txt, its oneline, delete spaces

def read_contig(file):
    with open(file, 'r') as f:
        return f.read().replace('\n', '')

covid_contig = read_contig('data/covid_contig_1.txt')

In [5]:
import math

def calculate_hidden_path_probability(path, transition_matrix):
    # Initialize log probability with log(1/2) for the first state
    log_probability = math.log(0.5)
    
    # Calculate log probability for the rest of the path
    for i in range(1, len(path)):
        prev_state = path[i-1]
        current_state = path[i]
        transition_prob = transition_matrix[prev_state][current_state]
        log_probability += math.log(transition_prob)
    
    return log_probability

# Parse input
path = "BBABAAAABBBABBBAAAAAABABABBBBBBAABAABBABABAAABBBBA"
states = ["A", "B"]
transition_matrix = {
    "A": {"A": 0.911, "B": 0.089},
    "B": {"A": 0.45, "B": 0.55}
}

# Calculate log probability
log_result = calculate_hidden_path_probability(path, transition_matrix)

# Convert log probability to actual probability
result = math.exp(log_result)

print(f"Log probability: {log_result:.12f}")
print(f"Actual probability: {result:.25f}")

Log probability: -46.373814128168
Actual probability: 0.0000000000000000000072462


In [8]:
import math

def create_hidden_path_permutations(states, length):
    if length == 1:
        return states
    
    permutations = []
    for state in states:
        for perm in create_hidden_path_permutations(states, length-1):
            permutations.append(state + perm)
    
    return permutations


def calculate_outcome_probability(emission_sequence, hidden_path, emission_matrix):
    log_probability = 0.0
    
    for emission, state in zip(emission_sequence, hidden_path):
        emission_prob = emission_matrix[state][emission]
        log_probability += math.log(emission_prob)
    
    return log_probability

# Parse input
emission_sequence = "THTHHHTHTTH"
alphabet = ["T", "H"]
# hidden_path = "FFFBBBBBFFF"
# calculate every possible hidden path and the prob of each i.e. one path is FFFFFFFFFFF, another is BFFFFFFFFF, etc
# then calculate the prob of the emission sequence given each hidden path

hidden_path_permutations = create_hidden_path_permutations(states, len(emission_sequence))
probabilities = {} # key is path, value is prob of emission sequence given path
states = ["F", "B"]
emission_matrix = {
    "F": {"T": 0.50, "H": 0.50},
    "B": {"T": 0.25, "H": 0.75}
}

for hidden_path in hidden_path_permutations:

    # Calculate log probability
    log_result = calculate_outcome_probability(emission_sequence, hidden_path, emission_matrix)

    # Convert log probability to actual probability
    result = math.exp(log_result)
    probabilities[hidden_path] = result

    print(f"Path " + hidden_path)
    print(f"Log probability: {log_result:.12f}")
    print(f"Actual probability: {result:.35f}")

# print the hidden path with the highest prob
max_prob = max(probabilities, key=probabilities.get)
print(f"Max prob path: {max_prob} + {probabilities[max_prob]}")

Path FFFFFFFFFFF
Log probability: -7.624618986159
Actual probability: 0.00048828124999999994578989137572478
Path FFFFFFFFFFB
Log probability: -7.219153878051
Actual probability: 0.00073242187500000010842021724855044
Path FFFFFFFFFBF
Log probability: -8.317766166719
Actual probability: 0.00024414062500000016263032587282567
Path FFFFFFFFFBB
Log probability: -7.912301058611
Actual probability: 0.00036621093750000000000000000000000
Path FFFFFFFFBFF
Log probability: -8.317766166719
Actual probability: 0.00024414062500000016263032587282567
Path FFFFFFFFBFB
Log probability: -7.912301058611
Actual probability: 0.00036621093750000000000000000000000
Path FFFFFFFFBBF
Log probability: -9.010913347279
Actual probability: 0.00012207031250000008131516293641283
Path FFFFFFFFBBB
Log probability: -8.605448239171
Actual probability: 0.00018310546875000000000000000000000
Path FFFFFFFBFFF
Log probability: -7.219153878051
Actual probability: 0.00073242187500000010842021724855044
Path FFFFFFFBFFB
Log probabi