Goal:
- arbitrary number of states 
- corresponding transitions/emission probabilities 
- arbitrarily long output 

In [1]:
import numpy as np
import math

In [21]:
"""
    Generate a sequence of outputs and corresponding states based on provided probability matrices

    Parameters:
    states (list): list of state names as strings, same order as probability matrices
                   (ex: ["A", "B", "C"])
    probability matrices (list): list of numpy arrays with probabilities
    n (int): length of sequence to output

    Returns:
    two lists, one with the outputs and the other with the corresponding states
"""
def e_machine(states, transition_matrices, n):
    output_states = []
    outputs = []

    # concatenate matrices for easy column access
    probability_matrix = np.concatenate(transition_matrices, axis=0)

    # pick a random state to start with
    state_index = np.random.choice(len(states))
    output_states.append(states[state_index])

    for i in range(n):
        # get column of probabilities corresponding to state
        state_prob = probability_matrix[:, state_index].tolist()

        # select transition
        result = np.random.choice(len(probability_matrix), p=state_prob)

        # figure out output and state corresponding to transition
        output = math.floor(result/len(states))
        state_index = result % len(states)

        output_states.append(states[state_index])
        outputs.append(output)
    
    return output_states, outputs

In [148]:
test_array_1 = [np.array([[0,1], [0,0]]),np.array([[0,0],[1,0]])]
e_machine(["A", "B"], test_array_1, 5)

(['B', 'A', 'B', 'A', 'B'], [0, 1, 0, 1])

In [4]:
# even process
test_array_2 = [np.array([[0.5,0], [0,0]]),np.array([[0,1],[0.5,0]])]
states, outputs = e_machine(["A", "B"], test_array_2, 5)
print("states:", states)
print("outputs:", outputs)

states: ['B', 'A', 'B', 'A', 'B']
outputs: [1, 1, 1, 1]


In [22]:
states, outputs = e_machine(["A", "B"], test_array_2, 50)
print("states:", states)
print("outputs:", outputs)

states: ['A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A']
outputs: [1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1]


- frequencies -> transition matrix
- np.random.direchlet
- implement simple reservoir


Generating a long sequence of outputs for the even process

In [29]:
np.random.seed(5)
states, outputs = e_machine(["A", "B"], test_array_2, 10000)
print("states:", states)
print("outputs:", outputs)

states: ['B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', '

Calculating frequency of each transition/emission

In [32]:
transitions = {"(A,0 | A)" : 0,
               "(B,0 | A)" : 0,
               "(A,0 | B)" : 0,
               "(B,0 | B)" : 0,
               "(A,1 | A)" : 0,
               "(B,1 | A)" : 0,
               "(A,1 | B)" : 0,
               "(B,1 | B)" : 0}
for i, output in enumerate(outputs):
    key = f"({states[i+1]},{output} | {states[i]})"
    transitions[key] += 1

transitions

{'(A,0 | A)': 3349,
 '(B,0 | A)': 0,
 '(A,0 | B)': 0,
 '(B,0 | B)': 0,
 '(A,1 | A)': 0,
 '(B,1 | A)': 3325,
 '(A,1 | B)': 3326,
 '(B,1 | B)': 0}

Calculating probabilities

In [34]:
# P(A,0 | A)
transitions["(A,0 | A)"] /(transitions["(A,0 | A)"] + transitions['(B,1 | A)'])

0.5017980221756069

In [35]:
# P(B,1 | A)
transitions["(B,1 | A)"] /(transitions["(A,0 | A)"] + transitions['(B,1 | A)'])

0.4982019778243932

In [109]:
np.random.dirichlet([1, 1, 1],5)

array([[0.24721804, 0.72800121, 0.02478075],
       [0.27221445, 0.70079945, 0.02698609],
       [0.17172626, 0.27886062, 0.54941312],
       [0.22181502, 0.60248433, 0.17570065],
       [0.67398978, 0.07129478, 0.25471544]])

In [112]:
alpha = [1]*2*2
alpha


[1, 1, 1, 1]

In [113]:
def distribution_generator(num_states, num_outputs):
    alpha = [1] * num_states * num_outputs
    return np.random.dirichlet(alpha, num_states)

In [116]:
distribution_generator(3,3)

array([[0.04023888, 0.04653782, 0.0034982 , 0.38633497, 0.10748449,
        0.00806991, 0.02341769, 0.14739771, 0.23702032],
       [0.10246714, 0.10138075, 0.00139134, 0.0636696 , 0.02182758,
        0.13271405, 0.04330758, 0.29457759, 0.23866436],
       [0.03557912, 0.08490082, 0.09771067, 0.33663521, 0.04722911,
        0.01221941, 0.17302139, 0.19235922, 0.02034505]])

In [104]:
"""
    Generate a sequence of outputs and corresponding states based on provided probability matrices

    Parameters:
    states (list): list of state names as strings, same order as probability matrices
                   (ex: ["A", "B", "C"])
    probability matrices (list): list of numpy arrays with probabilities
    n (int): length of sequence to output

    Returns:
    two lists, one with the outputs and the other with the corresponding states
"""
def e_machine_2(states, transition_matrix, n):
    output_states = []
    outputs = []

    # pick a random state to start with
    state_index = np.random.choice(len(states))
    output_states.append(states[state_index])

    for i in range(n):
        # get column of probabilities corresponding to state
        state_prob = transition_matrix[state_index]

        # select transition
        result = np.random.choice(len(state_prob), p=state_prob)

        # figure out output and state corresponding to transition
        output = math.floor(result/len(states))
        state_index = result % len(states)

        output_states.append(states[state_index])
        outputs.append(output)
    
    return output_states, outputs

In [105]:
test_array_1 = [[0,0,0,1], [1,0,0,0]]
#[np.array([[0,1], [0,0]]),np.array([[0,0],[1,0]])]
e_machine_2(["A", "B"], test_array_1, 5)

(['A', 'B', 'A', 'B', 'A', 'B'], [1, 0, 1, 0, 1])

In [108]:
# even process
test_array_2 = [[0.5,0,0,0.5], [0,0,1,0]]
#[np.array([[0.5,0], [0,0]]),np.array([[0,1],[0.5,0]])]
states, outputs = e_machine_2(["A", "B"], test_array_2, 100)
print("states:", states)
print("outputs:", outputs)

states: ['B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A']
outputs: [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0]


In [117]:
test_array_3 = distribution_generator(2,2)
print(test_array_3)
states, outputs = e_machine_2(["A", "B"], test_array_3, 10000)
print("states:", states)
print("outputs:", outputs)

[[0.07455702 0.44297418 0.40369131 0.07877748]
 [0.07645361 0.00369782 0.61336136 0.30648721]]
states: ['B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'B', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'B', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', '

In [118]:
transitions = {"(A,0 | A)" : 0,
               "(B,0 | A)" : 0,
               "(A,0 | B)" : 0,
               "(B,0 | B)" : 0,
               "(A,1 | A)" : 0,
               "(B,1 | A)" : 0,
               "(A,1 | B)" : 0,
               "(B,1 | B)" : 0}
for i, output in enumerate(outputs):
    key = f"({states[i+1]},{output} | {states[i]})"
    transitions[key] += 1

transitions

{'(A,0 | A)': 415,
 '(B,0 | A)': 2479,
 '(A,0 | B)': 270,
 '(B,0 | B)': 22,
 '(A,1 | A)': 2336,
 '(B,1 | A)': 477,
 '(A,1 | B)': 2686,
 '(B,1 | B)': 1315}

In [121]:
print(415/(415+2479+2336+477))
print(2479/(415+2479+2336+477))
print(2336/(415+2479+2336+477))
print(477/(415+2479+2336+477))


0.0727177150867356
0.4343788330120904
0.40932188540389
0.08358156649728404


In [None]:
def probability_rederivation(states, possible_outputs, output_states, outputs):
    pass