In [2]:
import numpy as np
import math

In [320]:
"""
    Generate a sequence of outputs and corresponding states based on provided probability matrices

    Parameters:
    states (list): list of state names as strings, same order as probability matrices
                   (ex: ["A", "B", "C"])
    transition_matrix: see above image
    n (int): length of sequence to output

    Returns:
    two lists, one with the outputs and the other with the corresponding states
"""
def e_machine(states, transition_matrix, n):
    output_states = []
    outputs = []

    # pick a random state to start with
    state_index = np.random.choice(len(states))
    output_states.append(states[state_index])

    for i in range(n):
        # get column of probabilities corresponding to state
        state_prob = transition_matrix[state_index]

        # select transition
        result = np.random.choice(len(state_prob), p=state_prob)

        # figure out output and state corresponding to transition
        output = math.floor(result/len(states))
        state_index = result % len(states)

        output_states.append(states[state_index])
        outputs.append(output)
    
    return output_states, outputs

In [321]:
# even process
test_array_2 = [[0.5,0,0,0.5], [0,0,1,0]]
#[np.array([[0.5,0], [0,0]]),np.array([[0,1],[0.5,0]])]
states, outputs = e_machine(["A", "B"], test_array_2, 100)
print("states:", states)
print("outputs:", outputs)

states: ['B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B']
outputs: [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1]


- frequencies -> transition matrix
- np.random.direchlet
- implement simple reservoir


#### Calculating the transition matrices from the outputs and their corresponding states

Generating a long sequence of outputs for the even process

In [322]:
np.random.seed(5)
states, outputs = e_machine(["A", "B"], test_array_2, 10000)
print("states:", states)
print("outputs:", outputs)

states: ['B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', '

Calculating frequency of each transition/emission

In [323]:
transitions = {"(A,0 | A)" : 0,
               "(B,0 | A)" : 0,
               "(A,0 | B)" : 0,
               "(B,0 | B)" : 0,
               "(A,1 | A)" : 0,
               "(B,1 | A)" : 0,
               "(A,1 | B)" : 0,
               "(B,1 | B)" : 0}
for i, output in enumerate(outputs):
    key = f"({states[i+1]},{output} | {states[i]})"
    transitions[key] += 1

transitions

{'(A,0 | A)': 3349,
 '(B,0 | A)': 0,
 '(A,0 | B)': 0,
 '(B,0 | B)': 0,
 '(A,1 | A)': 0,
 '(B,1 | A)': 3325,
 '(A,1 | B)': 3326,
 '(B,1 | B)': 0}

Calculating probabilities

In [324]:
# P(A,0 | A)
transitions["(A,0 | A)"] /(transitions["(A,0 | A)"] + transitions['(B,1 | A)'])

0.5017980221756069

In [325]:
# P(B,1 | A)
transitions["(B,1 | A)"] /(transitions["(A,0 | A)"] + transitions['(B,1 | A)'])

0.4982019778243932

Generalizing the above into a function

In [326]:
def probability_rederivation(output_states, outputs):
    transitions = {}
    for i, output in enumerate(outputs):
        key = f"({output_states[i+1]}, {output} | {output_states[i]})"
        transitions[key] = transitions.get(key, 0) + 1

    # to store lists of transitions from state n
    states = {}
    for key in transitions.keys():
        state = key[-2]
        states[state] = states.get(state, []) + [[key, transitions[key]]]

    probabilities = {}
    for key in states.keys():
        total = sum(n for _, n in states[key])
        for transition, frequency in states[key]:
            probabilities[f"P{transition}"] = frequency/total
    
    return probabilities

In [327]:
states, outputs = e_machine(["A", "B"], test_array_2, 10000)
print("states:", states)
print("outputs:", outputs)
probability_rederivation(states, outputs)

states: ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'B', 'A', '

{'P(A, 0 | A)': 0.49329516347747476,
 'P(B, 1 | A)': 0.5067048365225253,
 'P(A, 1 | B)': 1.0}

Running with randomly generated distributions

In [328]:
def distribution_generator(num_states, num_outputs):
    alpha = [1] * num_states * num_outputs
    return np.random.dirichlet(alpha, num_states)

In [329]:
test_array_3 = distribution_generator(2,2)
print(test_array_3)
states, outputs = e_machine(["A", "B"], test_array_3, 10000)
print("states:", states)
print("outputs:", outputs)
probability_rederivation(states, outputs)

[[0.16350922 0.18253106 0.14223141 0.5117283 ]
 [0.06748213 0.11881787 0.11067507 0.70302494]]
states: ['B', 'B', 'A', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '

{'P(B, 1 | B)': 0.6992004061429116,
 'P(A, 1 | B)': 0.11295849727122731,
 'P(B, 0 | B)': 0.1161314887676101,
 'P(A, 0 | B)': 0.07170960781825104,
 'P(B, 1 | A)': 0.5011786892975012,
 'P(B, 0 | A)': 0.18434700612918434,
 'P(A, 0 | A)': 0.1617161716171617,
 'P(A, 1 | A)': 0.15275813295615276}

#### Reservoir

$h_{t+1} = \tanh(Wh_t + vx_t)$

In [330]:
def reservoir(h_t, x_t, W, v):
    return np.tanh(W @ h_t + v*x_t)

In [6]:
def weight_initialization(n, epsilon=0.1):
    # generate random matrix
    W = np.random.uniform(low = -1, high = 1, size=[n,n])

    # find largest magnitude eigenvalue
    eig = max(abs(np.linalg.eigvals(W)))
    if eig > 1:
        W = W/(eig + epsilon)

    # adjust mean to 0
    W = W - np.mean(W)
    
    return W

In [8]:
mat = weight_initialization(5,5)
print(mat)
print("mean:", np.mean(mat))
print("std:", np.std(mat))
print("eig:", max(abs(np.linalg.eigvals(mat))))

[[ 0.06879373  0.30231701 -0.65443795  0.07961189  0.45490139]
 [-0.64874508 -0.56807345  0.24116376  0.18847555 -0.10936755]
 [-0.18562733  0.24893578  0.39976283  0.54402269  0.2045292 ]
 [ 0.29167887  0.47804723  0.10970744  0.23412058 -0.52520889]
 [-0.40418292 -0.35043924  0.00769113 -0.45084233  0.04316564]]
mean: 2.3869795029440865e-17
std: 0.36488864618823363
eig: 0.8381302311612964


Testing the reservoir 

In [336]:
# size of weight matrix
n = 5

# generate input
test_array = distribution_generator(2,2)
_, x = e_machine(["A", "B"], test_array, 10000)

# initial weights
W = weight_initialization(n)
v = np.random.rand(n,1)

# initialize hidden state
h_t = np.zeros_like(v)

# pass data through reservoir
for x_t in x:
    h_t = reservoir(h_t, x_t, W, v)

print(h_t)

[[-0.348211  ]
 [ 0.22042223]
 [ 0.26123346]
 [-0.30347971]
 [ 0.46099441]]


dimensionality reduction -> graph 

In [1]:
# https://scikit-learn.org/dev/modules/generated/sklearn.cross_decomposition.CCA.html
from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, y)
X_c, Y_c = cca.transform(X, y)