In [1]:
import doctest
class Stm:
    class Item:   #create an item container with head and pointer to next container                
        def __init__(self, data):    
            self.next = None
            self.data = data

    def __init__(self, maxlen): #STM constructor
        self.maxlen = maxlen
        self.size = 0
        self.head = None
        self.tail = None

    def push(self, data):
        self.remove(data)
        e = Stm.Item(data)
        if self.size == 0:
            self.head = self.tail = e
        else:
            self.tail.next = e
            self.tail = e
        self.size += 1
        if self.size > self.maxlen:
            self.pop()

    def pop(self):
        if self.size == 0:
            return None
        else:
            e = self.head.data
            self.head = self.head.next
            self.size -= 1
            return e

    def remove(self, value):
        if self.size == 0:
            return
        if self.head.data == value:
            self.head = self.head.next
            self.size -= 1
        else:
            p = self.head
            e = self.head.next
            while e:
                if e.data == value:
                    p.next = e.next
                    self.size -= 1
                    if e == self.tail:
                        self.tail = p
                    break
                p = p.next
                e = e.next

    class Iterator:
        def __init__(self, e):
            self.e = e

        def __next__(self):
            if self.e:
                e = self.e
                self.e = e.next
                return e.data
            raise StopIteration()

    def __iter__(self):
        return Stm.Iterator(self.head)

In [2]:
def p_equal(p1, p2):
    if len(p1) > 0 and p1[-1] == '$':
        p1 = p1[:-1]
    if len(p2) > 0 and p2[-1] == '$':
        p2 = p2[:-1]
    return p1 == p2

def p_match(p1, p2):
    if len(p1) > 0 and p1[-1] == '$':
        if len(p1) - 1 == len(p2):
            return p1[:-1] == p2
    else:
        if len(p1) <= len(p2):
            return p1 == p2[:len(p1)]
    return False

def p_diff(p1, p2):                 #below we look at example [a b d c] taken from [a b c] => [c] 
    for i in range(len(p2)):        #increment up to 3 non-inclusive (length of abc))
        if i >= len(p1) or p1[i] != p2[i]:       #if index of letter greater than pattern1 length
                                             #OR one of the letters on both patterns do not match
            break                            #stop the above loop
    return p2[i:]                #return remainder of p2, starting with the 1st non-matching letter

In [3]:
class Tree:
    def __init__(self):
        self.tree = [
            {
                'tlink': ['root'],
                'chldrn': [],
                'path': [],
                'idx': 0
            }
        ]
        self.stm = Stm(2)  #STM with size 2 chunks

    def add_node(self, tlink, img, path):
        idx = len(self.tree)
        self.tree.append({
            'tlink': tlink,      #tlink/test-link name = primitive = FIRST letter of the sorted pattern
            'image': img,
            'chldrn': [],
            'path': path,
            'idx': idx
        })

    def learn(self, p):
        node = self.retrieve(p)
        if node['tlink'] != ['root'] and p_match(node['image'], p): #if non-root image DOES match input pattern
            node = self.familiarisation(node, p)                         #never happens for root node!
        else:                                               #if non-root image does NOT match input pattern
            node = self.discrimination(node, p)
        self.stm.push(node)
        return node

    def retrieve(self, p):
        node = self.tree[0]                #start at the root node
        i = 0                          #look for chldrn test-links 
        while i < len(node['chldrn']): #while iterator < length of chldrn list
            l = node['chldrn'][i]   #i-th memeber of 'chldrn' list
            if p_match(self.tree[l]['path'], p): #check for match between pattern and chldrn and if TRUE
                node = self.tree[l]              #then foolow the flow chart> current node = descendant node
                i = 0
            else:
                i += 1                       #if no match go on to the next node
        self.stm.push(node)
        return node

    def discrimination(self, node, p):
        new_information = p_diff(node['path'], p)
        #print('new inf', new_information)
        if len(new_information) == 0:
            return node
        retr_chunk = self.retrieve(new_information)
        #print('retr chunk', retr_chunk)
        if retr_chunk['tlink'] == ['root']:
            self.add_node([new_information[0]], [], [new_information[0]])
             #add index of the added primitive tlink (add 2-1=1 if root, since root tree was appended with another tree in lines above)
            self.tree[0]['chldrn'].append(len(self.tree) - 1)
        else:
            path = node['path'] + retr_chunk['path']
            self.add_node(retr_chunk['path'], path, path.copy())
            node['chldrn'].append(len(self.tree) - 1)
        return self.tree[-1]

    def familiarisation(self, node, p):
        diff = p_diff(node['image'], p)
        if len(diff) == 0 or diff == ['$']: #do nothing if no diff between image and input pattern
            return node
        if diff[-1] == '$':
            diff.pop()
        first = self.retrieve(diff) #retrieve node using diff
        if first['tlink'] == ['root']:
            return self.discrimination(first, diff)
        if len(first['image']) == 0 or len(first['image']) > len(diff):
            node['image'].append(diff[0])
            return node
        first['image'].append(diff[0])
        return first

    def set_label(self, node_idx, lbl_idx):
        self.tree[node_idx]['label'] = lbl_idx

    def print_tree(self, node=None, idx=0, level=0):
        if not node:
            node = self.tree[0]
        if idx == 0:
            print('root')
        else:
            indent = '-------' * level
            tlink = '< ' + ' '.join(node['tlink']) + ' >'
            image = '< ' + ' '.join(node['image']) + ' >'
            text = 'Node: ' + str(idx)
            info = [indent + tlink, text, image]
            if 'label' in node:
                info.append('(%d)' % node['label'])
            print(*info)
        for cidx in node['chldrn']:
            self.print_tree(self.tree[cidx], cidx, level + 1)

    def print_stm(self):
        print(*map(lambda n : '%d < %s >' % (n['idx'], ' '.join(n['image']) if 'image' in n else ''), self.stm))
            

In [4]:
def testX():
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$','AB$',
        'AC$', 'AC$', 'AC$', 'AC$', 
        'BB$', 'BB$',
        'BAC$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        print(p)           #show pattern that is being sorted
        tree.print_stm()
    tree.print_tree()

testX()

['A', 'B', '$']
0 <  > 1 <  >
['A', 'B', '$']
0 <  > 1 < A >
['A', 'B', '$']
0 <  > 2 <  >
['A', 'B', '$']
2 <  > 1 < A B >
['A', 'B', '$']
2 <  > 1 < A B >
['A', 'C', '$']
0 <  > 3 <  >
['A', 'C', '$']
3 <  > 4 < A C >
['A', 'C', '$']
3 <  > 4 < A C >
['A', 'C', '$']
3 <  > 4 < A C >
['B', 'B', '$']
4 < A C > 2 < B >
['B', 'B', '$']
4 < A C > 2 < B B >
['B', 'A', 'C', '$']
4 < A C > 5 < B A C >
root
-------< A > Node: 1 < A B >
--------------< C > Node: 4 < A C >
-------< B > Node: 2 < B B >
--------------< A C > Node: 5 < B A C >
-------< C > Node: 3 <  >


In [5]:
def test01():
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$', 'AB$', 'AB$',
        'ABC$', 'ABC$', 'ABC$',
        'AB$', 'AB$','AB$', 'AB$',
        'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$',         #ctrl+/ to comment-uncomment
        'ABCD$', 'ABCD$',
        'ABC$', 'ABC$', 'ABC$',
        'ABCD$', 'ABCD$',
        'ABC$',
        'ABCD$',
        'ABCDE$', 'ABCDE$',
        'ABCD$'
    )))

    tree = Tree()
    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        print(p)           #show pattern that is being sorted
    tree.print_tree()

test01()

['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['D', 'Z', 'P', '$']
['A', 'B', 'C', 'D', '$']
['A', 'B', 'C', 'D', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', 'D', '$']
['A', 'B', 'C', 'D', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', 'D', '$']
['A', 'B', 'C', 'D', 'E', '$']
['A', 'B', 'C', 'D', 'E', '$']
['A', 'B', 'C', 'D', '$']
root
-------< A > Node: 1 < A B C >
--------------< B > Node: 4 < A B C D >
---------------------< C > Node: 8 < A B C D E >
----------------------------< D > Node: 11 < A B C D >
-------< B > Node: 2 <  >
-------< C > Node: 3 <  >
-------< D > Node: 5 < D Z P >
-------< Z > Node: 6 <  >
-------< P > Node:

In [6]:
def test02():
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$',
        'ABC$', 'ABC$','ABC$', 'ABC$','ABC$', 'ABC$',
        'DEF$', 'DEF$', 'DEF$', 'DEF$',
        'DAC$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        print(p)           #show pattern that is being sorted
    tree.print_tree()

test02()

['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['A', 'B', 'C', '$']
['D', 'E', 'F', '$']
['D', 'E', 'F', '$']
['D', 'E', 'F', '$']
['D', 'E', 'F', '$']
['D', 'A', 'C', '$']
root
-------< A > Node: 1 < A B C >
-------< B > Node: 2 <  >
-------< C > Node: 3 <  >
-------< D > Node: 4 < D E >
--------------< A > Node: 6 < D A >
-------< E > Node: 5 <  >


In [7]:
def test03():
    article = Tree()
    label = Tree()
    inp = (
        '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$',
        '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$'
    )

    for p in inp:
        art, lbl = map(list, p.split('~'))
        art_node = article.learn(art)
        if p_equal(art_node['image'], art):
            lbl_node = label.learn(lbl)
            if p_equal(lbl_node['image'], lbl):
                article.set_label(art_node['idx'], lbl_node['idx'])
        print(art, lbl)
        article.print_tree()
        label.print_tree()
        print()

test03()def test03():
    article = Tree()
    label = Tree()
    inp = (
        '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$',
        '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$'
    )

    for p in inp:
        art, lbl = map(list, p.split('~'))
        art_node = article.learn(art)
        if p_equal(art_node['image'], art):
            lbl_node = label.learn(lbl)
            if p_equal(lbl_node['image'], lbl):
                article.set_label(art_node['idx'], lbl_node['idx'])
        print(art, lbl)
        article.print_tree()
        label.print_tree()
        print()

test03()

SyntaxError: invalid syntax (<ipython-input-7-9c97f7be12aa>, line 21)

In [8]:
def dtest01():
    '''Autimatic Tests
    >>> dtest01()
    root
    -------< A > Node: 1 < A B C >
    -------< B > Node: 2 <  >
    -------< C > Node: 3 <  >
    -------< D > Node: 4 < D E >
    --------------< A > Node: 7 < D A >
    -------< E > Node: 5 <  >
    -------< F > Node: 6 <  >
    '''
    
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$',
        'ABC$', 'ABC$','ABC$', 'ABC$','ABC$', 'ABC$',
        'DEF$', 'DEF$', 'DEF$', 'DEF$','DEF$',
        'DAC$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        #print(p)           #show pattern that is being sorted
    tree.print_tree()

#dtest01()
doctest.run_docstring_examples(dtest01, globals(), verbose=False)

In [9]:
def dtest02():
    '''
    >>> dtest02()
    root
    -------< A > Node: 1 < A B C >
    --------------< B > Node: 4 < A B C D >
    ---------------------< C > Node: 8 < A B C D E >
    ----------------------------< D > Node: 11 < A B C D >
    -------< B > Node: 2 <  >
    -------< C > Node: 3 <  >
    -------< D > Node: 5 < D Z P >
    -------< Z > Node: 6 <  >
    -------< P > Node: 7 <  >
    -------< $ > Node: 9 <  >
    -------< E > Node: 10 <  >
    '''
    
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$', 'AB$', 'AB$',
        'ABC$', 'ABC$', 'ABC$',
        'AB$', 'AB$','AB$', 'AB$',
        'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$', 'DZP$',         #ctrl+/ to comment-uncomment
        'ABCD$', 'ABCD$',
        'ABC$', 'ABC$', 'ABC$',
        'ABCD$', 'ABCD$',
        'ABC$',
        'ABCD$',
        'ABCDE$', 'ABCDE$',
        'ABCD$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        #print(p)           #show pattern that is being sorted
    tree.print_tree()

#dtest02()
doctest.run_docstring_examples(dtest02, globals(), verbose=False)

In [10]:
def dtest03():
    '''
    >>> dtest03()
    root
    -------< A > Node: 1 < A B C >
    --------------< B > Node: 4 < A B >
    -------< B > Node: 2 <  >
    -------< C > Node: 3 <  >
    -------< D > Node: 5 < D E >
    --------------< A B > Node: 7 < D A B >
    -------< E > Node: 6 <  >

    '''
    inp = tuple(map(list, (
        'ABC$', 'ABC$', 'ABC$', 'ABC$', 'ABC$', 'ABC$',
        'AB$',
        'DEF$', 'DEF$', 'DEF$', 'DEF$',
        'DAB$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        #print(p)           #show pattern that is being sorted
    tree.print_tree()

#dtest03()
doctest.run_docstring_examples(dtest03, globals(), verbose=False)

In [11]:
def dtest04():
    '''
    >>> dtest04()
    root
    -------< A > Node: 1 < A B >
    --------------< C > Node: 5 < A C >
    -------< B > Node: 2 <  >
    -------< $ > Node: 3 <  >
    -------< C > Node: 4 <  >
    -------< $ > Node: 6 <  >

    '''
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$',
        'A$',
        'AC$','AC$',
        'A$'

    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        #print(p)           #show pattern that is being sorted
    tree.print_tree()

#dtest04()
doctest.run_docstring_examples(dtest04, globals(), verbose=False)

In [12]:
def dtest05():
    '''
    >>> dtest05()
    root
    -------< A > Node: 1 < A B >
    --------------< C > Node: 4 < A C >
    -------< B > Node: 2 < B B >
    --------------< A C > Node: 5 < B A C >
    -------< C > Node: 3 <  >

    '''
    
    inp = tuple(map(list, (
        'AB$', 'AB$', 'AB$', 'AB$','AB$',
        'AC$', 'AC$', 'AC$', 'AC$', 
        'BB$', 'BB$',
        'BAC$'
    )))

    tree = Tree()

    for p in inp:          #sort each pattern from input collection
        tree.learn(p)
        #print(p)           #show pattern that is being sorted
    tree.print_tree()

#dtest05()
doctest.run_docstring_examples(dtest05, globals(), verbose=False)

In [14]:
def dtest_cat01():
    '''
    >>> dtest_cat01()
    root
    -------< 1 > Node: 1 < 1 1 0 0 > (1)
    -------< 0 > Node: 2 < 0 1 1 0 > (2)
    root
    -------< A > Node: 1 < A >
    -------< B > Node: 2 < B >

    '''
    
    article = Tree()
    label = Tree()
    inp = (
        '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$', '1100$~A$',
        '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$', '0110$~B$'
    )

    for p in inp:
        art, lbl = map(list, p.split('~'))
        art_node = article.learn(art)
        if p_equal(art_node['image'], art):
            lbl_node = label.learn(lbl)
            if p_equal(lbl_node['image'], lbl):
                article.set_label(art_node['idx'], lbl_node['idx'])
#         print(art, lbl)
#         article.print_tree()
#         label.print_tree()
#         print()
    article.print_tree()
    label.print_tree()

#dtest_cat01()
doctest.run_docstring_examples(dtest_cat01, globals(), verbose=False)

Finding tests in NoName
Trying:
    dtest_cat01()
Expecting:
    root
    -------< 1 > Node: 1 < 1 1 0 0 > (1)
    -------< 0 > Node: 2 < 0 1 1 0 > (2)
    root
    -------< A > Node: 1 < A >
    -------< B > Node: 2 < B >
ok
