In [1]:
#FINAL SCRIPT
# !/usr/bin/env python3
import sys
import time

start_time = time.time()
########################################################################
# File:problem22.py
#  executable: problem22.py
#  purpose: To compute the Probability of a String Emitted by an HMM.
#  stderr: errors and status
#  stdout:
#
# Author: Arushi Mithal
#
#
# Notes:  1. To run the program from command line terminals:
#          Unix/Windows: python  problem22.py < input.txt > output.out
#
#
#
# Laptop, where test were running, specs:
#        Windows 10-64bit. Processor i-5 5200U CPU @2.20GHz 2.20 GHz
#        Internal RAM  4.00 GB
########################################################################

class OutcomeLikelihood:
    """
        Used to compute the Probability of a String Emitted by an HMM.

        Using the input data given( observed string, observable states, hidden states, transition matrix and emission
        matrix), calculate the initial probabilities of the hidden states. Loop through the observed string and find
        the sum of probabilities for the next hidden state. Add and return the path probabilities for the hidden
        states at sink.

        use commandline: python  problem22.py < input.txt > output.out
    """

    def __init__(self, string, E, states, transitionDict, emissionDict):
        """ class OutcomeLikelihood constructor """
        self.observedString = string
        self.possibleObservableStates = E
        self.possibleHiddenStates = states
        self.transitionMatrix = transitionDict
        self.emissionMatrix = emissionDict
        self.initialProbabilities = {}

    def observedStringProbability(self):
        """ calculate and return the probability of the observed string being emitted by an HMM"""
        pathProbabilities = []
        for i in range(len(self.possibleHiddenStates)):  #iterate through the hidden states to calculate initial probabilities
            self.initialProbabilities[self.possibleHiddenStates[i]] = 1 / len(self.possibleHiddenStates) * \
                                                                      self.emissionMatrix[
                                                                          self.observedString[0] +
                                                                          self.possibleHiddenStates[i]]
        pathProbabilities.append(self.initialProbabilities)
        for t in range(1, len(self.observedString)):   #iterate through the observed string from index 1
            newPathProbabilities = {}
            for hiddenState in self.possibleHiddenStates: #for each time step of observed string, iterate over all hidden states
                tempProbabilityDict = {}
                for key, value in pathProbabilities[t - 1].items():  # for each hidden state check the path probability key and value  at previous time step
                    #calculate the weight at each time step for each hidden state in the observed string and multiply that to each value of path probability retrieved above
                    tempProbabilityDict[key + hiddenState] = value * self.transitionMatrix[key + hiddenState] * \
                                                             self.emissionMatrix[
                                                                 self.observedString[t] + hiddenState]

                newPathProbabilities[hiddenState] = sum(tempProbabilityDict.values()) #add all path probabilites generated by the hidden states
            pathProbabilities.append(newPathProbabilities)
        outcomeLikelihood = 0
        for values in pathProbabilities[-1].values(): #iterate over the values of path probabilties for each hidden state at sink
            outcomeLikelihood += values               #and add them to a variable outcomeLikelihood and return that variable    
        return outcomeLikelihood

def parse():
    """ Used to parse the input file data"""
    filename1="rosalind_ba10d_762_2_dataset.txt"
    output_file=sys.argv[2]
    #print("first")
    with open(filename1) as file:
    #with sys.stdin as file:
        data = file.readlines()
    string = data[0].rstrip()
    alphabets = data[2].rstrip()
    E = [x for x in alphabets.split() if len(x) > 0]
    state = data[4].rstrip()
    states = [x for x in state.split() if len(x) > 0]
    temporarySpace = []
    start = 0
    for ln in data[4:]:
        if start == 1:
            if ln.startswith("---"):
                start = 0
                continue
            temporarySpace.append(ln.rstrip())
        if ln.startswith("---"):
            start = 1
    transition = []
    for i in temporarySpace:
        r = i.rstrip()
        r = r.strip()
        s = [x for x in r.split() if len(x) > 0]
        transition.append(s)
    transitionDict = {}
    for i in range(0, len(states)):
        for j in range(1, len(states) + 1):
            transitionDict[transition[0][i] + transition[j][0]] = float(transition[i + 1][j])
    temporarySpace1 = []
    start = 0
    for ln in data[6:]:
        if start == 1:
            if ln.startswith("---"):
                start = 0
                continue
            temporarySpace1.append(ln.rstrip())
        if ln.startswith("---"):
            start = 1
    emission = []
    for i in temporarySpace1:
        r = i.rstrip()
        r = r.strip()
        s = [x for x in r.split() if len(x) > 0]
        emission.append(s)
    emissionDict = {}
    for i in range(0, len(E)):
        for j in range(1, len(states) + 1):
            emissionDict[emission[0][i] + emission[j][0]] = float(emission[j][i + 1])
    return string, E, states, transitionDict, emissionDict

def main():
    """ Used to execute the program """
    string, E, states, transitionDict, emissionDict= parse()
    start_time = time.time()
    s1 = OutcomeLikelihood(string, E, states, transitionDict, emissionDict)
    o = s1.observedStringProbability()
    print("--- %s seconds ---" % (time.time() - start_time))
    print(o)

if __name__ == '__main__':
    main()
print("--- %s seconds ---" % (time.time() - start_time))


--- 0.0 seconds ---
2.8667029519969803e-49
--- 1.3261427879333496 seconds ---


In [51]:
#100 ITERATIONS LOOP 
# !/usr/bin/env python3
import sys
import time
start_time = time.time()


########################################################################
# File:problem22.py
#  executable: problem22.py
#  purpose: To compute the Probability of a String Emitted by an HMM.
#  stderr: errors and status
#  stdout:
#
# Author: Arushi Mithal
#
#
# Notes:  1. To run the program from command line terminals:
#          Unix/Windows: python  problem22.py < input.txt > output.out
#
#
#
# Laptop, where test were running, specs:
#        Windows 10-64bit. Processor i-5 5200U CPU @2.20GHz 2.20 GHz
#        Internal RAM  4.00 GB
########################################################################

class OutcomeLikelihood:
    """
        Used to compute the Probability of a String Emitted by an HMM.

        Using the input data given( observed string, observable states, hidden states, transition matrix and emission
        matrix), calculate the initial probabilities of the hidden states. Loop through the observed string and find
        the sum of probabilities for the next hidden state. Add and return the path probabilities for the hidden
        states at sink.

        use commandline: python  problem22.py < input.txt > output.out
    """

    def __init__(self, string, E, states, transitionDict, emissionDict):
        """ class OutcomeLikelihood constructor """
        self.observedString = string
        self.possibleObservableStates = E
        self.possibleHiddenStates = states
        self.transitionMatrix = transitionDict
        self.emissionMatrix = emissionDict
        self.initialProbabilities = {}

    def observedStringProbability(self):
        """ calculate and return the probability of the observed string being emitted by an HMM"""
        pathProbabilities = []
        for i in range(len(self.possibleHiddenStates)):  #iterate through the hidden states to calculate initial probabilities
            self.initialProbabilities[self.possibleHiddenStates[i]] = 1 / len(self.possibleHiddenStates) * \
                                                                      self.emissionMatrix[
                                                                          self.observedString[0] +
                                                                          self.possibleHiddenStates[i]]
        pathProbabilities.append(self.initialProbabilities)
        for t in range(1, len(self.observedString)):   #iterate through the observed string from index 1
            newPathProbabilities = {}
            for hiddenState in self.possibleHiddenStates: #for each time step of observed string, iterate over all hidden states
                tempProbabilityDict = {}
                for key, value in pathProbabilities[t - 1].items():  # for each hidden state check the path probability key and value  at previous time step
                    #calculate the weight at each time step for each hidden state in the observed string and multiply that to each value of path probability retrieved above
                    tempProbabilityDict[key + hiddenState] = value * self.transitionMatrix[key + hiddenState] * \
                                                             self.emissionMatrix[
                                                                 self.observedString[t] + hiddenState]

                newPathProbabilities[hiddenState] = sum(tempProbabilityDict.values()) #add all path probabilites generated by the hidden states
            pathProbabilities.append(newPathProbabilities)
        outcomeLikelihood = 0
        for values in pathProbabilities[-1].values(): #iterate over the values of path probabilties for each hidden state at sink
            outcomeLikelihood += values               #and add them to a variable outcomeLikelihood and return that variable    
        return outcomeLikelihood

def parse():
    """ Used to parse the input file data"""
    filename1="rosalind_ba10d_762_8_dataset.txt"
    output_file=sys.argv[2]
    #print("first")
    with open(filename1) as file:
    #with sys.stdin as file:
        data = file.readlines()
    string = data[0].rstrip()
    alphabets = data[2].rstrip()
    E = [x for x in alphabets.split() if len(x) > 0]
    state = data[4].rstrip()
    states = [x for x in state.split() if len(x) > 0]
    temporarySpace = []
    start = 0
    for ln in data[4:]:
        if start == 1:
            if ln.startswith("---"):
                start = 0
                continue
            temporarySpace.append(ln.rstrip())
        if ln.startswith("---"):
            start = 1
    transition = []
    for i in temporarySpace:
        r = i.rstrip()
        r = r.strip()
        s = [x for x in r.split() if len(x) > 0]
        transition.append(s)
    transitionDict = {}
    for i in range(0, len(states)):
        for j in range(1, len(states) + 1):
            transitionDict[transition[0][i] + transition[j][0]] = float(transition[i + 1][j])
    temporarySpace1 = []
    start = 0
    for ln in data[6:]:
        if start == 1:
            if ln.startswith("---"):
                start = 0
                continue
            temporarySpace1.append(ln.rstrip())
        if ln.startswith("---"):
            start = 1
    emission = []
    for i in temporarySpace1:
        r = i.rstrip()
        r = r.strip()
        s = [x for x in r.split() if len(x) > 0]
        emission.append(s)
    emissionDict = {}
    for i in range(0, len(E)):
        for j in range(1, len(states) + 1):
            emissionDict[emission[0][i] + emission[j][0]] = float(emission[j][i + 1])
    return string, E, states, transitionDict, emissionDict

def main():
    """ Used to execute the program """
    string, E, states, transitionDict, emissionDict= parse()
    for i in range(0,100):
        s1 = OutcomeLikelihood(string, E, states, transitionDict, emissionDict)
        o = s1.observedStringProbability()
    print(o)

if __name__ == '__main__':
    main()
print("--- %s seconds ---" % (time.time() - start_time))

7.8112685005557595e-50
--- 0.14893507957458496 seconds ---
