In [1]:
%load_ext autoreload
# %reload_ext autoreload

# Reload all modules imported with %aimport every time before executing the Python code typed
%autoreload 1

%aimport context_nn 
%aimport phrase_feeder 
%aimport bitnotes
%aimport watch_point
%aimport cluster
%aimport constants
import numpy as np
from bitarray import bitarray
from context_nn import ContextNN
from watch_point import WatchPoint
from cluster import Cluster
from phrase_feeder import PhraseFeeder
from bitnotes import BitNotes
from pprint import pprint
import math
import constants as const

### Load phrase base

In [3]:
import pickle

def load_phrase_base(file_name: str) -> (dict, list):
    with open(file_name, 'rb') as f:
        data = pickle.load(f)
        phrase_base = data.get('phrase_base', {})
        marks = data.get('marks', {})
        return phrase_base, marks
    
# phrase_base, marks = load_phrase_base('./data/texts/prob_text_base.pickle')
phrase_base, marks = load_phrase_base('./data/texts/lang_base_onebit.pickle')
# phrase_base, marks = load_phrase_base('./data/texts/monotone_base.pickle')
# phrase_base, marks = load_phrase_base('./data/texts/dna_base.pickle')

In [4]:
marks

{(0,): 'bel',
 (1,): 'blg',
 (2,): 'eng',
 (3,): 'epo',
 (4,): 'jbo',
 (5,): 'pol',
 (6,): 'rus',
 (7,): 'ukr'}

### Draw notations

In [None]:
from ipythonblocks import BlockGrid

def draw_notation(notation: np.array):
    bit_grid = BlockGrid(len(notation), 1, fill=(17, 41, 129))
    for block in range(bit_grid.width):
        color = bit_grid[0, block]
        if notation[block]:
            bit_grid[0, block] = (244, 195, 173)
    bit_grid.lines_on = False
    bit_grid.show()

def draw_note_notations(notes, note_idx: int):
    for i in range(len(notes[note_idx])):
        draw_notation(notes.note_notation_as_bits(note_idx, i))

In [None]:
abc_notes = BitNotes(note_count=26, 
                     notation_count=5, 
                     active_bits=8, 
                     bit_count=255)

key = list(phrase_base.keys())[0]
phrase = phrase_base[key][0]
bit_chord = abc_notes.phrase_chord(phrase)
draw_notation(abc_notes.notation_as_bits(bit_chord))
print(phrase)
for note_idx, notation_idx in phrase:
    notation = abc_notes.note_notation_as_bits(note_idx, notation_idx)
    draw_notation(notation)

In [None]:
feeder = PhraseFeeder(phrase_base, marks)    

In [None]:
phrases, output_bits = feeder.take_phrase_bunch()
print(output_bits, marks[output_bits])
print(phrases)

### Learn model

In [5]:
def feed_phrase_batch(cxnn: ContextNN, feeder: PhraseFeeder, bit_notes: BitNotes, count=200):
    phrases, output_bits = feeder.take_phrase_batch(count=200, random_bit_key=True)
    mark = feeder.marks[output_bits]    
    output_bits = set(output_bits)
    for phrase in phrases:
        bit_chord = bit_notes.phrase_chord_as_bits(phrase)
        cxnn.receive_bits(input_bits=bit_chord, output_bits=output_bits)
    return mark    

def feed_n_batches(cxnn: ContextNN, feeder: PhraseFeeder, bit_notes: BitNotes, n=10, batch_size=200):
    for i in range(n):
        mark = feed_phrase_batch(cxnn, feeder, bit_notes, count=batch_size)
        print(f'batch {i+1}/{n} {mark}')
    print(f'clusters: {cxnn.cluster_count()}')
        
def learn_cycle(cxnn: ContextNN, 
                feeder: PhraseFeeder, 
                bit_notes: BitNotes,
                learn_batch_size=200,
                consolidate=True,
                consolidate_batch_size=10,
                reduce=True,
                n_accumulate_batches=30,
                n_consolidate_batches=300,
                reduce_min_component=0.2,
                reduce_min_activations=100):
    print('\naccumulating clusters...')
    cxnn.state = const.STATE_ACCUMULATE
    feed_n_batches(cxnn, feeder, bit_notes, n=n_accumulate_batches, batch_size=learn_batch_size)
    
    if consolidate:
        print('\nconsolidating clusters...')
        cxnn.state = const.STATE_CONSOLIDATE
        feed_n_batches(cxnn, feeder, bit_notes, n=n_consolidate_batches, batch_size=consolidate_batch_size)
    
    if reduce:
        print('\nreducing clusters...')
        cxnn.reduce_clusters(min_component=reduce_min_component, 
                             min_activations=reduce_min_activations,
                             trim=True,
                             remain_parts=3,
                             clear_stats=True,
                             consolidate=True,
                             amnesty=True)
        print(f'clusters: {cxnn.cluster_count()}')
    

In [40]:
notes = bitnotes.BitNotes(note_count=26, 
                          notation_count=5, 
                          active_bits=8, 
                          bit_count=256)

feeder = phrase_feeder.PhraseFeeder(phrase_base, marks)  

cxnn = context_nn.ContextNN(input_bit_count=256,
                            output_bit_count=len(marks.keys()),
                            watch_point_count=2000,
                            watch_bit_count=32,
                            cluster_make_threshold=7,
                            cluster_activate_threshold=5,
                            bit_notes=notes,
                            data_marks=marks)

In [41]:
%%time
for n in range(20):
    print(f'\nCycle {n+1}')
    learn_cycle(cxnn, 
                feeder, 
                notes,
                learn_batch_size=200,
                consolidate=True,
                consolidate_batch_size=5,
                reduce=True,
                n_accumulate_batches=32,
                n_consolidate_batches=160,
                reduce_min_component=0.10,
                reduce_min_activations=50)


Cycle 1

accumulating clusters...
batch 1/32 epo
batch 2/32 ukr
batch 3/32 blg
batch 4/32 jbo
batch 5/32 eng
batch 6/32 pol
batch 7/32 bel
batch 8/32 rus
batch 9/32 bel
batch 10/32 eng
batch 11/32 epo
batch 12/32 ukr
batch 13/32 rus
batch 14/32 pol
batch 15/32 blg
batch 16/32 jbo
batch 17/32 ukr
batch 18/32 pol
batch 19/32 bel
batch 20/32 epo
batch 21/32 blg
batch 22/32 jbo
batch 23/32 rus
batch 24/32 eng
batch 25/32 jbo
batch 26/32 pol
batch 27/32 eng
batch 28/32 ukr
batch 29/32 rus
batch 30/32 blg
batch 31/32 bel
batch 32/32 epo
clusters: 190382

consolidating clusters...
batch 1/160 blg
batch 2/160 jbo
batch 3/160 rus
batch 4/160 bel
batch 5/160 pol
batch 6/160 eng
batch 7/160 ukr
batch 8/160 epo
batch 9/160 rus
batch 10/160 bel
batch 11/160 ukr
batch 12/160 eng
batch 13/160 epo
batch 14/160 pol
batch 15/160 jbo
batch 16/160 blg
batch 17/160 ukr
batch 18/160 jbo
batch 19/160 rus
batch 20/160 eng
batch 21/160 epo
batch 22/160 blg
batch 23/160 pol
batch 24/160 bel
batch 25/160 blg
ba

batch 49/160 ukr
batch 50/160 jbo
batch 51/160 epo
batch 52/160 pol
batch 53/160 bel
batch 54/160 blg
batch 55/160 rus
batch 56/160 eng
batch 57/160 bel
batch 58/160 eng
batch 59/160 pol
batch 60/160 ukr
batch 61/160 rus
batch 62/160 jbo
batch 63/160 blg
batch 64/160 epo
batch 65/160 ukr
batch 66/160 blg
batch 67/160 epo
batch 68/160 jbo
batch 69/160 bel
batch 70/160 rus
batch 71/160 eng
batch 72/160 pol
batch 73/160 epo
batch 74/160 rus
batch 75/160 jbo
batch 76/160 blg
batch 77/160 eng
batch 78/160 pol
batch 79/160 ukr
batch 80/160 bel
batch 81/160 pol
batch 82/160 rus
batch 83/160 eng
batch 84/160 epo
batch 85/160 blg
batch 86/160 bel
batch 87/160 ukr
batch 88/160 jbo
batch 89/160 bel
batch 90/160 eng
batch 91/160 jbo
batch 92/160 blg
batch 93/160 epo
batch 94/160 pol
batch 95/160 rus
batch 96/160 ukr
batch 97/160 bel
batch 98/160 eng
batch 99/160 blg
batch 100/160 epo
batch 101/160 jbo
batch 102/160 pol
batch 103/160 rus
batch 104/160 ukr
batch 105/160 blg
batch 106/160 ukr
batch 1

batch 129/160 ukr
batch 130/160 pol
batch 131/160 rus
batch 132/160 bel
batch 133/160 blg
batch 134/160 eng
batch 135/160 jbo
batch 136/160 epo
batch 137/160 rus
batch 138/160 ukr
batch 139/160 blg
batch 140/160 epo
batch 141/160 bel
batch 142/160 jbo
batch 143/160 eng
batch 144/160 pol
batch 145/160 jbo
batch 146/160 epo
batch 147/160 bel
batch 148/160 ukr
batch 149/160 blg
batch 150/160 eng
batch 151/160 rus
batch 152/160 pol
batch 153/160 bel
batch 154/160 ukr
batch 155/160 jbo
batch 156/160 eng
batch 157/160 blg
batch 158/160 rus
batch 159/160 pol
batch 160/160 epo
clusters: 220821

reducing clusters...
clusters: 25944

Cycle 6

accumulating clusters...
batch 1/32 eng
batch 2/32 epo
batch 3/32 jbo
batch 4/32 rus
batch 5/32 ukr
batch 6/32 bel
batch 7/32 pol
batch 8/32 blg
batch 9/32 blg
batch 10/32 epo
batch 11/32 bel
batch 12/32 eng
batch 13/32 rus
batch 14/32 jbo
batch 15/32 pol
batch 16/32 ukr
batch 17/32 epo
batch 18/32 jbo
batch 19/32 bel
batch 20/32 rus
batch 21/32 ukr
batch 2

batch 12/160 ukr
batch 13/160 bel
batch 14/160 epo
batch 15/160 pol
batch 16/160 rus
batch 17/160 pol
batch 18/160 epo
batch 19/160 ukr
batch 20/160 jbo
batch 21/160 rus
batch 22/160 blg
batch 23/160 eng
batch 24/160 bel
batch 25/160 eng
batch 26/160 epo
batch 27/160 ukr
batch 28/160 bel
batch 29/160 pol
batch 30/160 rus
batch 31/160 jbo
batch 32/160 blg
batch 33/160 ukr
batch 34/160 blg
batch 35/160 pol
batch 36/160 bel
batch 37/160 eng
batch 38/160 rus
batch 39/160 jbo
batch 40/160 epo
batch 41/160 eng
batch 42/160 bel
batch 43/160 blg
batch 44/160 epo
batch 45/160 ukr
batch 46/160 jbo
batch 47/160 rus
batch 48/160 pol
batch 49/160 ukr
batch 50/160 epo
batch 51/160 jbo
batch 52/160 bel
batch 53/160 rus
batch 54/160 blg
batch 55/160 eng
batch 56/160 pol
batch 57/160 eng
batch 58/160 blg
batch 59/160 bel
batch 60/160 pol
batch 61/160 jbo
batch 62/160 rus
batch 63/160 ukr
batch 64/160 epo
batch 65/160 ukr
batch 66/160 blg
batch 67/160 jbo
batch 68/160 bel
batch 69/160 rus
batch 70/160 e

batch 93/160 ukr
batch 94/160 epo
batch 95/160 jbo
batch 96/160 bel
batch 97/160 bel
batch 98/160 eng
batch 99/160 blg
batch 100/160 rus
batch 101/160 jbo
batch 102/160 ukr
batch 103/160 epo
batch 104/160 pol
batch 105/160 eng
batch 106/160 bel
batch 107/160 rus
batch 108/160 ukr
batch 109/160 blg
batch 110/160 jbo
batch 111/160 pol
batch 112/160 epo
batch 113/160 pol
batch 114/160 rus
batch 115/160 blg
batch 116/160 eng
batch 117/160 epo
batch 118/160 bel
batch 119/160 ukr
batch 120/160 jbo
batch 121/160 epo
batch 122/160 eng
batch 123/160 ukr
batch 124/160 jbo
batch 125/160 rus
batch 126/160 bel
batch 127/160 blg
batch 128/160 pol
batch 129/160 jbo
batch 130/160 bel
batch 131/160 rus
batch 132/160 eng
batch 133/160 blg
batch 134/160 epo
batch 135/160 ukr
batch 136/160 pol
batch 137/160 pol
batch 138/160 jbo
batch 139/160 blg
batch 140/160 rus
batch 141/160 bel
batch 142/160 ukr
batch 143/160 eng
batch 144/160 epo
batch 145/160 eng
batch 146/160 pol
batch 147/160 epo
batch 148/160 ukr

batch 6/32 jbo
batch 7/32 pol
batch 8/32 rus
batch 9/32 ukr
batch 10/32 epo
batch 11/32 rus
batch 12/32 bel
batch 13/32 eng
batch 14/32 jbo
batch 15/32 blg
batch 16/32 pol
batch 17/32 eng
batch 18/32 pol
batch 19/32 bel
batch 20/32 ukr
batch 21/32 blg
batch 22/32 jbo
batch 23/32 epo
batch 24/32 rus
batch 25/32 bel
batch 26/32 jbo
batch 27/32 eng
batch 28/32 rus
batch 29/32 blg
batch 30/32 epo
batch 31/32 ukr
batch 32/32 pol
clusters: 225046

consolidating clusters...
batch 1/160 blg
batch 2/160 ukr
batch 3/160 bel
counter ends
batch 4/160 eng
batch 5/160 jbo
batch 6/160 pol
batch 7/160 epo
batch 8/160 rus
batch 9/160 rus
batch 10/160 bel
batch 11/160 jbo
batch 12/160 blg
batch 13/160 pol
batch 14/160 ukr
batch 15/160 eng
batch 16/160 epo
batch 17/160 rus
batch 18/160 bel
batch 19/160 jbo
batch 20/160 eng
batch 21/160 epo
batch 22/160 pol
batch 23/160 ukr
batch 24/160 blg
batch 25/160 pol
batch 26/160 eng
batch 27/160 ukr
batch 28/160 blg
batch 29/160 jbo
batch 30/160 epo
batch 31/160 b

batch 54/160 eng
batch 55/160 ukr
batch 56/160 rus
batch 57/160 bel
batch 58/160 eng
batch 59/160 blg
batch 60/160 jbo
batch 61/160 ukr
batch 62/160 rus
batch 63/160 epo
batch 64/160 pol
batch 65/160 jbo
batch 66/160 ukr
batch 67/160 eng
batch 68/160 rus
batch 69/160 bel
batch 70/160 pol
batch 71/160 epo
batch 72/160 blg
batch 73/160 epo
batch 74/160 pol
batch 75/160 eng
batch 76/160 blg
batch 77/160 bel
batch 78/160 jbo
batch 79/160 rus
batch 80/160 ukr
batch 81/160 jbo
batch 82/160 rus
batch 83/160 pol
batch 84/160 blg
batch 85/160 eng
batch 86/160 ukr
batch 87/160 bel
batch 88/160 epo
batch 89/160 bel
batch 90/160 ukr
batch 91/160 blg
batch 92/160 eng
batch 93/160 jbo
batch 94/160 rus
batch 95/160 epo
batch 96/160 pol
batch 97/160 epo
batch 98/160 pol
batch 99/160 blg
batch 100/160 eng
batch 101/160 jbo
batch 102/160 bel
batch 103/160 ukr
batch 104/160 rus
batch 105/160 pol
batch 106/160 rus
batch 107/160 epo
batch 108/160 jbo
batch 109/160 eng
batch 110/160 blg
batch 111/160 ukr
ba

batch 133/160 blg
batch 134/160 eng
batch 135/160 bel
batch 136/160 pol
batch 137/160 bel
batch 138/160 ukr
batch 139/160 pol
batch 140/160 eng
batch 141/160 rus
batch 142/160 blg
batch 143/160 epo
batch 144/160 jbo
batch 145/160 blg
batch 146/160 jbo
batch 147/160 pol
batch 148/160 rus
batch 149/160 epo
batch 150/160 ukr
batch 151/160 bel
batch 152/160 eng
batch 153/160 epo
batch 154/160 ukr
batch 155/160 blg
batch 156/160 eng
batch 157/160 bel
batch 158/160 rus
batch 159/160 jbo
batch 160/160 pol
clusters: 238247

reducing clusters...
clusters: 45726

Cycle 18

accumulating clusters...
batch 1/32 rus
batch 2/32 epo
batch 3/32 eng
batch 4/32 pol
batch 5/32 ukr
batch 6/32 blg
batch 7/32 bel
batch 8/32 jbo
batch 9/32 epo
batch 10/32 blg
batch 11/32 pol
batch 12/32 jbo
batch 13/32 ukr
batch 14/32 bel
batch 15/32 eng
batch 16/32 rus
batch 17/32 ukr
batch 18/32 blg
batch 19/32 pol
batch 20/32 jbo
batch 21/32 epo
batch 22/32 bel
batch 23/32 rus
batch 24/32 eng
batch 25/32 jbo
batch 26/32 en

batch 16/160 eng
batch 17/160 epo
batch 18/160 blg
batch 19/160 rus
batch 20/160 jbo
batch 21/160 bel
batch 22/160 pol
batch 23/160 eng
batch 24/160 ukr
batch 25/160 epo
batch 26/160 jbo
batch 27/160 pol
batch 28/160 blg
batch 29/160 ukr
batch 30/160 bel
batch 31/160 rus
batch 32/160 eng
batch 33/160 pol
batch 34/160 epo
batch 35/160 jbo
batch 36/160 eng
batch 37/160 rus
batch 38/160 ukr
batch 39/160 bel
batch 40/160 blg
batch 41/160 ukr
batch 42/160 rus
batch 43/160 epo
batch 44/160 pol
batch 45/160 jbo
batch 46/160 bel
batch 47/160 eng
batch 48/160 blg
batch 49/160 jbo
batch 50/160 bel
batch 51/160 pol
batch 52/160 ukr
batch 53/160 eng
batch 54/160 epo
batch 55/160 blg
batch 56/160 rus
batch 57/160 rus
batch 58/160 bel
batch 59/160 ukr
batch 60/160 pol
batch 61/160 epo
batch 62/160 blg
batch 63/160 jbo
batch 64/160 eng
batch 65/160 jbo
batch 66/160 blg
batch 67/160 ukr
batch 68/160 pol
batch 69/160 epo
batch 70/160 bel
batch 71/160 eng
batch 72/160 rus
batch 73/160 rus
batch 74/160 b

### Model stats

In [42]:
print(cxnn.cluster_count())
print(sorted(cxnn.point_stats(), key=lambda x: x[0]))
# print(cxnn.point_stats())
marks

49021
[(0, 3), (0, 9), (0, 18), (0, 38), (0, 13), (0, 2), (0, 5), (0, 5), (0, 0), (0, 26), (0, 2), (0, 37), (0, 4), (0, 5), (0, 11), (0, 3), (0, 0), (0, 46), (0, 4), (0, 2), (0, 3), (0, 3), (0, 22), (0, 5), (0, 3), (0, 2), (0, 0), (0, 61), (0, 1), (0, 0), (0, 23), (0, 0), (0, 68), (0, 32), (0, 1), (0, 5), (0, 219), (0, 4), (0, 1), (0, 3), (0, 28), (0, 0), (0, 1), (0, 32), (0, 71), (0, 7), (0, 13), (0, 15), (0, 0), (0, 1), (0, 2), (0, 13), (0, 3), (0, 24), (0, 8), (0, 3), (0, 4), (0, 0), (0, 4), (0, 21), (0, 0), (0, 8), (0, 1), (0, 28), (0, 15), (0, 2), (0, 3), (0, 1), (0, 5), (0, 0), (0, 1), (0, 20), (0, 33), (0, 23), (0, 46), (0, 37), (0, 12), (0, 52), (0, 4), (0, 11), (0, 1), (0, 20), (0, 4), (0, 81), (0, 1), (0, 10), (0, 11), (0, 9), (0, 38), (0, 0), (0, 15), (0, 18), (0, 0), (0, 1), (0, 17), (0, 12), (0, 49), (0, 0), (0, 5), (0, 3), (0, 4), (0, 11), (0, 13), (0, 4), (0, 0), (0, 7), (0, 0), (0, 6), (0, 15), (0, 11), (0, 37), (0, 0), (0, 1), (0, 2), (0, 0), (0, 0), (0, 3), (0, 12), (

{(0,): 'bel',
 (1,): 'blg',
 (2,): 'eng',
 (3,): 'epo',
 (4,): 'jbo',
 (5,): 'pol',
 (6,): 'rus',
 (7,): 'ukr'}

In [14]:
print(cxnn.cluster_count())
pprint(cxnn.cluster_len_stats())
pprint(cxnn.cluster_activity_stats())

78271
{6: 215, 7: 12951, 8: 62434, 9: 2586, 10: 77, 11: 8}
{0: 78271}


In [36]:
cxnn.cluster_consolidated_stats()

{0: 5092,
 1: 5159,
 2: 1158,
 3: 544,
 4: 306,
 5: 320,
 6: 267,
 7: 292,
 8: 211,
 9: 224,
 10: 155,
 11: 196,
 12: 151,
 13: 99,
 14: 109,
 15: 82,
 16: 114,
 17: 106,
 18: 83,
 19: 75,
 20: 56,
 21: 53,
 22: 42,
 23: 49,
 24: 51,
 25: 44,
 26: 69,
 27: 34,
 28: 26,
 29: 31,
 30: 45,
 31: 43,
 32: 46,
 33: 33,
 34: 37,
 35: 26,
 36: 55,
 37: 28,
 38: 39,
 39: 33,
 40: 41,
 41: 25,
 42: 38,
 43: 15,
 44: 14,
 45: 1,
 46: 2}

In [None]:
def dict_value(d: dict, n=0) -> object:
    i = 0
    diter = iter(d)
    while i <= n:
        key = next(diter)
        i += 1
    return d[key]


point = cxnn.point_objects[8]
cluster_idx = 0
cluster = point.cluster_objects[cluster_idx]
pprint(cluster.stats)
print(sum(cluster.stats.values()))
# pprint(cluster.component_stats())
# pprint(cluster.bit_rate())

print(sorted(cluster.component_stats().items(), key=lambda x: x[1], reverse=True))
print(point.cluster_masks)
print(np.where(cluster.bit_mask > 0)[0])
print(point.watch_bits)
print(point.cluster_masks[cluster_idx])
print(len(point.cluster_masks), len(point.cluster_objects))

# parts = cluster.component_s
# enthropy = 0.0
# for part in parts.values():
#     enthropy += -part * math.log2(part)
# print(enthropy)

In [None]:
cluster_with_stats = None
for wp in cxnn.point_objects:
    for cluster in wp.cluster_objects:    
        if cluster.stats:
            cluster_with_stats = cluster
            break
    else: 
        continue
    break    

if cluster_with_stats:
    print(cluster_with_stats.stats, cluster_with_stats.consolidated)
else:
    print('nothing to print')
    
    

### Save-load model

In [9]:
import pickle

def save_cxnn(cxnn: ContextNN, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(cxnn, f)

def load_cxnn(file_name: str) -> ContextNN:
    with open(file_name, 'rb') as f:
        cxnn = pickle.load(f)
        return cxnn
    
def save_notes(notes: BitNotes, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(notes, f)

def load_notes(file_name: str) -> BitNotes:
    with open(file_name, 'rb') as f:
        notes = pickle.load(f)
        return notes



In [43]:
save_cxnn(cxnn, './data/cxnn_langs_2000.pickle')

In [20]:
cxnn = load_cxnn('./data/cxnn_langs_2000.pickle')

In [None]:
save_cxnn(cxnn, './data/cxnn_monotone.pickle')

In [9]:
save_cxnn(cxnn, './data/cxnn_probs.pickle')

In [18]:
save_cxnn(cxnn, './data/cxnn_dna.pickle')