In [None]:
import numpy as np
import math

def e_machine(states, transition_matrix, n: int):
    """
        Generate a sequence of hidden states and their corresponding emissions

        Parameters:
            states (list): list of state names as strings, same order as probability matrices
                        (ex: ["A", "B", "C"])
            transition_matrix: see transition_encoding.jpg
            n (int): length of sequence to output

        Returns:
            two lists, one with the hidden states sequence and the other with the corresponding emissions
    """
    hidden_states = []
    emissions = []

    # pick a random state to start with
    # state_index = np.random.choice(len(states))
    state_index = 1
    hidden_states.append(states[state_index])
    print("starting state =", hidden_states)

    for i in range(n):
        # see transition_encoding.jpg to see how this 3-D matrix is stored in 2-D

        # get column of probabilities corresponding to state
        state_prob_list = transition_matrix[state_index]


        # select transition index - this will be chosen with probability state_prob_list
        result_index = np.random.choice(len(state_prob_list), p=state_prob_list)

        # if you see the encoding, you will see that the first 2 entries correspond to emission 0, and the next two will correspond to emmission 1
        # thus, we can use the .floor() function to see if the index corresponds to emission 0 or 1!
        emission = math.floor(result_index / len(states))
        emissions.append(emission)

        #similarly, we use the index to find the state it corresponds to per our transition encoding
        state_index = result_index % 2
        hidden_states.append(states[state_index])

    return hidden_states, emissions



In [None]:
def reservoir(h_t, x_t, W, v):
    """
    Reservoir with equation h_{t+1} = tanh(W*h_t + v*x_t)
    """
    return np.tanh(W*h_t + v*x_t)

In [None]:
my_matrix = [[1/2, 0, 0, 1/2], [0, 0, 1, 0]]
hidden_states, emissisons = e_machine(['a', 'b'], my_matrix, 500)

print("hidden_states = ", hidden_states)
print("emissions = ", emissisons)

starting state = ['b']
hidden_states =  ['b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'b', 'a', 'b', 'a',

In [None]:
W = 1
v = 1
h_t = 0

reservoir_states = []
for x_t in emissisons:
    reservoir_states.append(reservoir(h_t, x_t, W, v))
    h_t = reservoir_states[-1]

print("reservoir_states = ", reservoir_states)

reservoir_states =  [0.7615941559557649, 0.9426807890983486, 0.9597460317134175, 0.7441635628990307, 0.9407076192094486, 0.9595900780324168, 0.74409396507861, 0.6316122700463155, 0.9262908422001276, 0.9584325247873527, 0.9609700422101399, 0.9611637822634994, 0.7447955252204334, 0.9407802953774465, 0.9595958326302346, 0.7440965334837134, 0.9406999057035522, 0.9595894672194134, 0.961058492247736, 0.9611705184157167, 0.7447985246767223, 0.9407806401112911, 0.9595958599248167, 0.7440965456658405, 0.9406999071055163, 0.9595894673304323, 0.7440936925074291, 0.6316121062130341, 0.559161277271348, 0.9152844743418295, 0.9575270127929931, 0.9609006770802873, 0.9611584987727609, 0.9611781333040645, 0.9611796281822311, 0.744802581001921, 0.9407811063102166, 0.9595958968364663, 0.7440965621402598, 0.6316138310490574, 0.9262910638392117, 0.958432542830343, 0.9609700435910886, 0.9611637823686783, 0.9611785355770296, 0.9611796588087576, 0.744802594638967, 0.9407811078775347, 0.9595958969605599, 0.7440

In [None]:
from sklearn.linear_model import LinearRegression

# Create a LinearRegression object
model = LinearRegression()

# Reshape emissions to a 2D array for sklearn
emissions_reshaped = np.array(emissisons).reshape(-1, 1)

# Train the model
model.fit(emissions_reshaped, reservoir_states)
r_squared = model.score(emissions_reshaped, reservoir_states)
print(f"R-squared: {r_squared}")


# R-squared can become negative if you check
# this on data that it didn't train on



R-squared: 0.8637968688907105
