In [37]:
from keras import backend as K
# Accuracy ne prenant pas en compte les charactères complétés

# Remove this when the cosineSimilarity will be added
def cosineSimilarity(h1, h2):
    return 2.3

def ignore_class_accuracy(to_ignore=2):
    def ignore_accuracy(y_true, y_pred):
        y_true_class = K.argmax(y_true, axis=-1)
        y_pred_class = K.argmax(y_pred, axis=-1)
 
        ignore_mask = K.cast(K.not_equal(y_pred_class, to_ignore), 'int32')
        matches = K.cast(K.equal(y_true_class, y_pred_class), 'int32') * ignore_mask
        accuracy = K.sum(matches) / K.maximum(K.sum(ignore_mask), 1)
        return accuracy
    return ignore_accuracy

# Fonction qui imitte le comportement du réseau de neurone
def get_hidden_state (word):
    prec = 0.005
    output = []
    
    for i, char in enumerate(word):
        if char == "a":
            prec = 0.02*(i+1) + prec
            output.append(prec)
        elif char == "b":
            prec = 0.03*(i+1) + prec
            output.append(prec)
        elif char == "e":
            prec = 0.005*(i+1) + prec
            output.append(prec)
        else:
            prec = 0.05*(i+1) + prec
            output.append(prec)
    return output



def get_data(filepath):

    inputs = []
    outputs = []
    with open(filepath, 'r') as f:
        lines = f.readlines()
    print(lines[:3])


    max_length = 0

    for line in lines:
        res = ""
        isInput = True
        for symbol in line:
            if symbol in [',', '\n']:
                if isInput:
                    inputs.append(res)
                    max_length = len(res) if len(res) > max_length else max_length
                    res = ""
                    isInput = not isInput
                    continue
                else:
                    outputs.append(res)
            res += symbol
        #print(line)
    return inputs, outputs, max_length

def merging_checking(st1, st2, k):
    similarity = False
    consistency = False

    # for the similarity, we will merge state1 to state 2 if
    # If every input of state1 are in the input set of state2 and
    # If for each input state1 input set, we get the same output
    # from state1 and state2

    if set(st1._outTr.keys()) in set(st2._outTr.keys()):
        for char in list(st1._outTr.keys()):
            if set(st1._outTr[char].keys()) == set(st1._outTr[char].keys()):
                similarity = True

    # compute the cosine similarity of the two hidden state value
    # If it's greater than k, the consistency constraint in respected
    if cosineSimilarity(st1.hidden_state, st2.hidden_state) > k:
        consistency = True

    return similarity and consistency

def class_mapping(label, numb_class = 3):
    y_train = []
    for x in label:
        assert int(x) < numb_class
        y_train.append([int(i==int(x)) for i in range(numb_class)])
        
    return y_train

def tokenization(word, num_token = 4):
    x_train = []
    for x in word:
        if x == 'a':
            x_train.append(1)
        elif x == 'b':
            x_train.append(2)
        elif x == 'e':
            x_train.append(3)
        else:
            x_train.append(0)
    
    return x_train

def masking(word, pad_char = 'z'):
    return [x!=pad_char for x in word]

In [32]:
import tensorflow as tf
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SimpleRNN
from keras.layers import Embedding
from keras.losses import CategoricalCrossentropy

class Tagger(tf.keras.Model):

  def __init__(self, n_tokens = 3, embed_dim = 10, max_length = 99, rnn_dim = 10, n_labels=3):
    super().__init__()
    self.embedding = Embedding(n_tokens, embed_dim, input_length=max_length, mask_zero=True)
    self.rnn = SimpleRNN(rnn_dim, return_sequences=True)
    self.outputs = Dense(n_labels, activation='softmax')

  def call(self, token_ids, labels, mask, training = True):
    embeddings = self.embedding(token_ids)
    states = self.rnn(embeddings)
    logits = self.outputs(states)
    loss = CategoricalCrossentropy()(labels, logits)
    #predictions = tf.math.argmax(logits, axis=-1)
    bool_acc = tf.equal(tf.argmax(logits, -1), tf.argmax(labels, -1))
    accuracy = tf.reduce_mean(tf.cast(bool_acc, tf.float32))
    #acc = ((predictions == labels) * mask).sum().float() / mask.sum()
    return {
            "states": states,
            "predictions": logits,
            "accuracy": accuracy.numpy(),
            "loss": loss,
          }



In [40]:
from model import Tagger 
import os
import numpy as np
from tqdm import trange
import tensorflow as tf
from tensorflow import keras

if __name__ == "__main__":
  
    #max_length = 4
    #corpus = ['ba', 'b', 'a', 'baa', 'a', 'baaa', 'aa', 'b', 'abaa', 'abb', 'bb']
    #labels = ['11', '1', '1', '110', '1', '1100', '10', '1', '1010', '101', '11']
    corpus, labels, max_length = get_data('dataset2.txt')
    corpus_ = ["e"+x+"z"*(max_length-len(x)) for x in corpus]
    labels_ = ["0"+x+"2"*(max_length - len(x)) for x in labels]
    states = []

    n_epochs = 10
    batch_size = 10

    x_train = np.array([tokenization(x) for x in corpus_])
    y_train = np.array([class_mapping(x) for x in labels_])
    mask = np.array([masking(x) for x in corpus_])

    version_name = '01'
    model_dir = os.path.join("weigths", version_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    filepath = "weigths/model_weights.h5"

    optimizer = keras.optimizers.Adam(learning_rate=0.01)

    trained_model = Tagger(4, 10, max_length+1, 10)
    print(x_train.shape)
    
    trained_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #trained_model.fit(x_train, y_train, epochs=n_epochs, batch_size=batch_size, validation_split=0.2, verbose=1)
    
    train_results = trained_model(x_train, y_train, mask)

    """for epoch in range(5):
        for batch_idx in trange(0, len(x_train) - 2, 2):
            with tf.GradientTape() as tape:
                batch_tokens = x_train[batch_idx:batch_idx + 2]
                batch_labels = y_train[batch_idx:batch_idx + 2]
                batch_mask = mask[batch_idx:batch_idx + 2]
                train_results = trained_model(batch_tokens, batch_labels, batch_mask)
                
                loss = train_results['loss']
            grads = tape.gradient(loss, trained_model.trainable_variables)
            optimizer.apply_gradients(zip(grads, trained_model.trainable_variables))
        #trained_model.save_weights(filepath)
        train_results = trained_model(x_train, y_train, mask)
    
        train_preds = train_results["predictions"]
        #print('\n\n\n Les prédictions sont: \n\n')
        #print(train_preds)
        

        print(f'\n\n The accuracy: {train_results["accuracy"]}')

    #trained_model.save_weights("weights.")

    representations = train_results["states"][:5]
    print('\n\n\n Les étatss sont: \n\n')
    print(representations)"""

['abaabbabbbbbbbbbaaabbbbbabbaabbbaa,0101110111111111011111110110111101\n', 'bbaaaabaaabbaabababbaabbabbaabbbababaababaaaaabbababbbab,11011110111101101011011101101111010101101011111101011101\n', 'bbbabbabababaaaabbbbbaabaababaababbbbbbabbbbaaabbaaabababaaaab,11101101010101111111101101101011011111101111011110111010101111\n']
(10000, 100)
[3 1 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 1 2 2 1 1 2 2 2 1 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [8]:
import tensorflow as tf

labels = tf.convert_to_tensor([[[1,0,0],[1,0,0],[0,0,1]],
                                [[0,1,0],[1,0,0],[0,1,0]],
                                [[0,0,1],[0,0,1],[0,0,1]],
                                [[0,1,0],[1,0,0],[0,0,1]],
                                [[0,0,1],[0,1,0],[0,0,1]]])

logits = tf.convert_to_tensor([[[0.1,0.9,0],[0.1,9,0,0],[0,0.1,0.9]],
                                [[0.1,0.9,0],[0.1,9,0,0],[0.1,0.9,0]],
                                [[0,0.1,0.9],[0,0.1,0.9],[0,0.1,0.9]],
                                [[0.1,0.9,0],[0.1,9,0,0],[0,0.1,0.9]],
                                [[0,0.1,0.9],[0.1,0.9,0],[0,0.1,0.9]]])

bool_acc = tf.equal(tf.argmax(logits, -1), tf.argmax(labels, -1))
accuracy = tf.reduce_mean(tf.cast(bool_acc, tf.float32))

print(accuracy.numpy())

ValueError: Can't convert non-rectangular Python sequence to Tensor.

In [None]:
print("Baby come over")

In [None]:
import tensorflow as tf

# Create an optimizer.
opt = tf.keras.optimizers.experimental.SGD(learning_rate=0.1)
var1, var2 = tf.Variable(1.0), tf.Variable(2.0)

# Compute the gradients for a list of variables.
with tf.GradientTape() as tape:
  loss = 3 * var1 * var1 + 2 * var2 * var2
grads = tape.gradient(loss, [var1, var2])

# Process the gradients.
grads[0] = grads[0] + 1

# Ask the optimizer to apply the gradients on variables.
opt.apply_gradients(zip(grads, [var1, var2]))
print(var1)

In [15]:
def score_all_prefix(mealy, dataset, labels):
    # A bunch of code on how to determine if a label correctly corresponds
    # to the output of the mealy machine
    scores = 0
    total = 0
    for i in range(len(dataset)):
        output = mealy.return_output(dataset[i])
        print(output)
        score = [labels[i][j] == output[j] for j in range(len(output))]
        scores += score.count(True)
        total += len(output)

    return scores/total * 100

In [18]:
from copy import deepcopy
a = ['eaaab', 'ebabb', 'eaac', 'eaab']
b = ['00001', '01011', '0001', '0010']
arcs = [[2,'a', '0', 3],[1,'b', '0', 3],[2,'a', '0', 3],[0,'b', '0', 3],[1,'b', '0', 3],]

class Mealy(object):

    def __init__(self, id, arcs):
        self.id = id
        self.transitions = arcs

    def return_output(self, word):
        # word must always start with 'e' representing the bos
        output = ''
        for i in range(len(word)):
            if word[i] in {'e', 'a'}:
                output += f'{0}'
            else:
                output += f'{1}'
        return output
    
    def removeDuplicateTransitions(self):
        add = True
        transitions = [self.transitions[0]]
        for x in self.transitions:
            for y in transitions:
                if x == y:
                    add = False
            if add:
                transitions.append(x)
            add = True
        self.transitions = deepcopy(transitions)

m = Mealy(0, arcs)
m.removeDuplicateTransitions()
print(m.transitions)

[[2, 'a', '0', 3], [1, 'b', '0', 3], [0, 'b', '0', 3]]


In [21]:

from copy import deepcopy
class Mealy(object):

    def __init__(self, id, root, nodes, arcs):
        # nodes = [0,1,2,...]
        # arcs = [(0,a,1,1), ...]
        self.id = id
        self.root =  root
        self.nodes = nodes
        self.transitions = [list(x) for x in arcs]
    
    def output(self, input_state, input_char):
        for x in self.transitions:
            if x[0] == input_state and x[1] ==  input_char:
                return x[2], x[3]
        return None

    def getInpOut(self, node):
        inp_out = []
        for x in self.transitions:
            if x[0] == node:
                inp_out.append([x[1],x[2]])
        
        return inp_out

    # get the output of the machine given a word
    def return_output(self, word):
        # we consider that the word comes without the bos sign
        output = ''
        idx = self.root
        for i in range(len(word)):
           if self.output(idx, word[i]) == None:
            print(f'There\'s no transitions from {idx} with {word[i]}')
            break
           output += self.output(idx, word[i])[0]
           idx = self.output(idx, word[i])[1]
        return output
    
    # get the trace of the machine given a word
    def return_states(self, word):
        
        # we consider that the word comes without the bos sign
        # for a word abba we have [0,1,2,3,4] for example
        idx = [self.root]
        for i in range(len(word)):
           if self.output(idx[i], word[i]) == None:
            print(f'There\'s no transitions from {idx[i]} with {word[i]}')
            break
           idx.append(self.output(idx[i], word[i])[1])
        return idx
    
    
    def print(self):
        print(f'The amount of states is {len(self.nodes)}')
        print(self.nodes)
        #print("Different states of the Tree: ")
        #for i in self.nodes:
        #    print(f'ID: {i}\tHidden value: {0}')

        print(f'\nThe amount of arcs is {len(self.transitions)}\n')
        print("\nDifferent transitions of the Tree: ")
        for transition in self.transitions:
            print(f'{transition[0]} --> {transition[1]}/{transition[2]} --> {transition[3]}')

    def removeDuplicate(self):
        add = True
        states = []
        for x in self.nodes:
            if x not in states:
                states.append(x)
        
        self.nodes = deepcopy(states)

        transitions = [self.transitions[0]]
        for x in self.transitions:
            for y in transitions:
                if x == y:
                    add = False
            if add:
                transitions.append(x)
            add = True
        self.transitions = deepcopy(transitions)

        nodes = []
        for x in self.transitions:
            if x[0] not in nodes:
                nodes.append(x[0])
            if x[3] not in nodes:
                nodes.append(x[3])

        self.nodes = deepcopy(nodes)
        
        
    def merge_states(self, state1, state2):
        

        self.merging(state1, state2)
        
        #self.removeDuplicate()
        self.print()


    def merging(self, state1, state2):
        print(f'\n The two states are {state1} and {state2}\n')
        submerged = False
        if state1 == state2:
            return 0
        if (state1 not in self.nodes or state2 not in self.nodes):
            return 1
        
        for i in range(len(self.transitions)):
            for j in range(len(self.transitions)):
                if(i == j):
                    continue

                # merge the children of the two mergable states
                if self.transitions[i][0] == state1 and self.transitions[j][0] == state2:
                    if self.transitions[i][1:3] == self.transitions[j][1:3]:
                        submerged = True
                        print(f'\n The two SUB states are {self.transitions[i][3]} and {self.transitions[j][3]}\n')
                        self.merging(self.transitions[i][3], self.transitions[j][3])

        for i in range(len(self.transitions)):
            if self.transitions[i][0] == state2:
                self.transitions[i][0] = state1
            if self.transitions[i][3] == state2:
                self.transitions[i][3] = state1
        
        # If the merged is the root
        if self.root == state2:
            self.root = state1

        # Delete the merged state
        """if state2 in self.nodes:
            self.nodes.remove(state2)"""

        # Remove doble transaction
        transitions = []

        """for x in self.transitions:
            add = True
            for y in transitions:
                if x == y:
                    add = False
            if add:
                transitions.append(x)
            
        self.transitions = deepcopy(transitions)"""
        #print(self.transitions)
        
        
        #self.print()

        return 0

nodes = [0,1,2,3,4,5,6,7,8,9,10]
arcs = [(0,'b','1',1),
        (1,'a','1',2),
        (2,'a','0',4),
        (4,'a','0',5),
        (0,'a','1',3),
        (3,'b','0',7),
        (7,'b','1',10),
        (7,'a','1',8),
        (8,'a','0',9),
        (3,'a','0',6)]

fsm = Mealy(0, 0, nodes, arcs)
fsm.print()

The amount of states is 11
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

The amount of arcs is 10


Different transitions of the Tree: 
0 --> b/1 --> 1
1 --> a/1 --> 2
2 --> a/0 --> 4
4 --> a/0 --> 5
0 --> a/1 --> 3
3 --> b/0 --> 7
7 --> b/1 --> 10
7 --> a/1 --> 8
8 --> a/0 --> 9
3 --> a/0 --> 6


In [22]:
print(fsm.return_states('abb'))
print(fsm.return_output('abb'))
print(fsm.return_states('bbb'))

[0, 3, 7, 10]
101
There's no transitions from 1 with b
[0, 1]


In [65]:
#fsm.merge_states(2,4)
#fsm.merge_states(0,1)
#fsm.merge_states(3,6)
#fsm.merge_states(0,7)


 The two states are 0 and 1

The amount of states is 11


The amount of arcs is 10


Different transitions of the Tree: 
0 --> b/1 --> 0
0 --> a/1 --> 3
3 --> a/0 --> 3
3 --> a/0 --> 3
0 --> a/1 --> 3
3 --> b/0 --> 7
7 --> b/1 --> 10
7 --> a/1 --> 8
8 --> a/0 --> 9
3 --> a/0 --> 3

 The two states are 3 and 6

The amount of states is 11


The amount of arcs is 10


Different transitions of the Tree: 
0 --> b/1 --> 0
0 --> a/1 --> 3
3 --> a/0 --> 3
3 --> a/0 --> 3
0 --> a/1 --> 3
3 --> b/0 --> 7
7 --> b/1 --> 10
7 --> a/1 --> 8
8 --> a/0 --> 9
3 --> a/0 --> 3

 The two states are 0 and 7


 The two SUB states are 0 and 10


 The two states are 0 and 10


 The two SUB states are 3 and 8


 The two states are 3 and 8


 The two SUB states are 3 and 9


 The two states are 3 and 9


 The two SUB states are 3 and 3


 The two states are 3 and 3


 The two SUB states are 3 and 3


 The two states are 3 and 3


 The two SUB states are 3 and 3


 The two states are 3 and 3

The amount of stat

In [46]:
import numpy as np

def cosine(h1, h2):
    cos = 0
    s1 = 0
    s2 = 0
    assert len(h1) == len(h2)
    for i in range(len(h1)):
        cos += h1[i]*h2[i]
        s1 += h1[i]**2
        s2 += h2[i]**2
    s1 = s1**(1/2)
    s2 = s2**(1/2)
    return cos/(s1*s2)


mask = [[True, True, True, False, False],
        [True, True, False, False, False],
        [True, True, False, False, False],
        [True, True, True, True, False],
        [True, True, True, True, True],
        [True, True, True, False, False],
        [True, True, True, True, True],
        [True, True, True, True, False]]
labels  = np.array([[0,1,1,2,2],
        [0,1,2,2,2],
        [0,1,2,2,2],
        [0,1,1,0,2],
        [0,1,1,0,0],
        [0,1,0,2,2],
        [0,1,0,1,0],
        [0,1,0,1,2]])
representations = np.array([[[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.67, 1.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.1, 3.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.3, 4.5], [2.3, 4.5], [2.67, 1.01]]])
idx = [[0,1,2], 
    [0,1],
    [0,3],
    [0,1,2,4],
    [0,1,2,4,5],
    [0,3,6],
    [0,3,7,8,9],
    [0,3,7,10]] # maps strings to states
n_states = 11
states = np.zeros((n_states, 2))
states_mask = np.zeros(n_states)

for i, _r in enumerate(representations):
    #print(_r)
    states[idx[i]] = _r[mask[i]]
    states_mask[idx[i]] = labels[i][mask[i]]

sim1 = []
for i in range(len(states)):
    sim1.append([])
    for j in range(len(states)):
        sim1[i].append(cosine(states[i], states[j]))

sim1 = tf.convert_to_tensor(sim1)
#print(sim1)

fusionable = True
res, pruned = 0, 0
threshold = 1
total = 0
ter = 0

"""while fusionable and ter < 10:
    fusionable = False
    ter += 1"""

#fsm.print()

"""for i in range(states.shape[0]):
    for j in range(i):
        pass_ = False
        if(i == j):
            continue
        for x in fsm.getInpOut(i):
            for y in fsm.getInpOut(j):
                if(x[0] == y[0] and x[1] != y[1]):
                    pass_ = True
        #print(f'--we have {i} and {j} and the similarity {sim[i][j]}')
        if pass_:
            continue
        
        if(sim1[i][j] >= threshold):
            print(f'The states to merge {i} and {j}')
            fusionable = True
            total += 1
            res = fsm.merge_states(i, j)
            #pruned += 1 - res"""
    

#print(states)
#print(states_mask)
#print(sim[0])
#fsm.print()
#fsm.removeDuplicate()
#print('After Duplicate deletion')
#fsm.print()

tf.Tensor(
[[1.         1.         0.99067566 0.99067566 0.99067566 0.99067566
  0.99067566 1.         0.99067566 0.99067566 1.        ]
 [1.         1.         0.99067566 0.99067566 0.99067566 0.99067566
  0.99067566 1.         0.99067566 0.99067566 1.        ]
 [0.99067566 0.99067566 1.         1.         1.         1.
  1.         0.99067566 1.         1.         0.99067566]
 [0.99067566 0.99067566 1.         1.         1.         1.
  1.         0.99067566 1.         1.         0.99067566]
 [0.99067566 0.99067566 1.         1.         1.         1.
  1.         0.99067566 1.         1.         0.99067566]
 [0.99067566 0.99067566 1.         1.         1.         1.
  1.         0.99067566 1.         1.         0.99067566]
 [0.99067566 0.99067566 1.         1.         1.         1.
  1.         0.99067566 1.         1.         0.99067566]
 [1.         1.         0.99067566 0.99067566 0.99067566 0.99067566
  0.99067566 1.         0.99067566 0.99067566 1.        ]
 [0.99067566 0.990675

"for i in range(states.shape[0]):\n    for j in range(i):\n        pass_ = False\n        if(i == j):\n            continue\n        for x in fsm.getInpOut(i):\n            for y in fsm.getInpOut(j):\n                if(x[0] == y[0] and x[1] != y[1]):\n                    pass_ = True\n        #print(f'--we have {i} and {j} and the similarity {sim[i][j]}')\n        if pass_:\n            continue\n        \n        if(sim1[i][j] >= threshold):\n            print(f'The states to merge {i} and {j}')\n            fusionable = True\n            total += 1\n            res = fsm.merge_states(i, j)\n            #pruned += 1 - res"

In [39]:
def cosine_merging(fsm, states, states_mask, threshold = 1.0):

    cos = tf.keras.losses.CosineSimilarity(axis=-1)
    sim = -cos(states[None, :, :], states[:, None, :])
     
    total, pruned = 0, 0
    fsm_ = deepcopy(fsm)

    for i in range(states.shape[0]):
        for j in range(i):
            pass_ = False
            if(i == j):
                continue
            for x in fsm_.getInpOut(i):
                for y in fsm_.getInpOut(j):
                    if(x[0] == y[0] and x[1] != y[1]):
                        pass_ = True
            #print(f'--we have {i} and {j} and the similarity {sim[i][j]}')
            if pass_:
                continue
            
            if(sim[i][j] >= threshold):
                print(f'The states to merge {i} and {j}')
                fusionable = True
                total += 1
                res = fsm_.merge_states(i, j)
                #pruned += 1 - res
                
    fsm_.removeDuplicate()
    fsm_.print()
    fsm_.id = str(fsm_.id) + 'min'
    return fsm_

In [37]:
import tensorflow as tf

In [40]:

import numpy as np
mask = [[True, True, True, False, False],
        [True, True, False, False, False],
        [True, True, False, False, False],
        [True, True, True, True, False],
        [True, True, True, True, True],
        [True, True, True, False, False],
        [True, True, True, True, True],
        [True, True, True, True, False]]
labels  = np.array([[0,1,1,2,2],
        [0,1,2,2,2],
        [0,1,2,2,2],
        [0,1,1,0,2],
        [0,1,1,0,0],
        [0,1,0,2,2],
        [0,1,0,1,0],
        [0,1,0,1,2]])
representations = np.array([[[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.67, 1.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.1, 3.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.1, 3.01], [2.67, 1.01], [2.67, 1.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.3, 4.5], [2.1, 3.01], [2.1, 3.01]],
                   [[2.3, 4.5], [2.1, 3.01], [2.3, 4.5], [2.3, 4.5], [2.67, 1.01]]])
idx = [[0,1,2], 
    [0,1],
    [0,3],
    [0,1,2,4],
    [0,1,2,4,5],
    [0,3,6],
    [0,3,7,8,9],
    [0,3,7,10]] # maps strings to states
n_states = 11
states = np.zeros((n_states, 2))
states_mask = np.zeros(n_states)

for i, _r in enumerate(representations):
    #print(_r)
    states[idx[i]] = _r[mask[i]]
    states_mask[idx[i]] = labels[i][mask[i]]

#cos = tf.keras.losses.CosineSimilarity(axis=-1,reduction=tf.keras.losses.Reduction.NONE)
#sim = -cos(states[None, :, :], states[:, None, :])

merged_fsm = cosine_merging(fsm, states, states_mask)

InvalidArgumentError: {{function_node __wrapped__StridedSlice_device_/job:localhost/replica:0/task:0/device:CPU:0}} Index out of range using input dim 0; input has only 0 dims [Op:StridedSlice] name: strided_slice/

In [72]:
merged_fsm.return_output('bab')

'110'

In [77]:
_corpus = ['ba', 'b', 'a', 'baa', 'a', 'baaa', 'aa', 'b', 'abaa', 'abb', 'bb', 'abb', 'aaaa', 'baaaaab', 'abababa']
_labels = ['11', '1', '1', '110', '1', '1100', '10', '1', '1010', '101', '11', '101', '1000', '1100000', '1010101']
def score_whole_words(mealy, dataset, labels):
    acc = 0
    for word, y in zip(dataset, labels):
        acc += (mealy.return_output(word) == y)
    return (acc / len(dataset) * 100)

_acc = score_whole_words(merged_fsm, _corpus, _labels)
_acc

100.0

In [24]:
sim

<tf.Tensor: shape=(11, 11), dtype=float64, numpy=
array([[1.        , 1.        , 0.99067566, 0.99067566, 0.99067566,
        0.99067566, 0.99067566, 1.        , 0.99067566, 0.99067566,
        1.        ],
       [1.        , 1.        , 0.99067566, 0.99067566, 0.99067566,
        0.99067566, 0.99067566, 1.        , 0.99067566, 0.99067566,
        1.        ],
       [0.99067566, 0.99067566, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.99067566, 1.        , 1.        ,
        0.99067566],
       [0.99067566, 0.99067566, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.99067566, 1.        , 1.        ,
        0.99067566],
       [0.99067566, 0.99067566, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.99067566, 1.        , 1.        ,
        0.99067566],
       [0.99067566, 0.99067566, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.99067566, 1.        , 1.        ,
        0.99067566],
       [

In [8]:
a = np.array([[2,3], [5,6]])
b = np.array([[2.1, 3.2], [4.8, 5.7]])
sim2 = cos(a[None, :, :], a[:, None, :])
sim

TypeError: Cannot convert 1e-12 to EagerTensor of dtype int32

In [57]:
rer = [['a','0'],['b','1']]
rerr = []
sas = []
if all(x in rer for x in rerr):
    print(True)
#rer = [set(x) for x in rer]
#rerr = [set(x) for x in rerr]

True


In [29]:
4**(1/2)

2.0

TypeError: tuple indices must be integers or slices, not tuple