In [1]:
%load_ext autoreload
# %reload_ext autoreload

# Reload all modules imported with %aimport every time before executing the Python code typed
%autoreload 1

%aimport context_nn 
%aimport phrase_feeder 
%aimport notes
%aimport watch_point
%aimport cluster
import numpy as np
from bitarray import bitarray
from context_nn import ContextNN
from watch_point import WatchPoint
from cluster import Cluster
from phrase_feeder import PhraseFeeder
from pprint import pprint
import math

In [8]:
abc_notes = notes.Notes(note_count=26, 
                        notation_count=10, 
                        active_bits=8, 
                        bit_count=255)

In [2]:
import pickle

def load_phrase_base(file_name: str) -> (dict, list):
    with open(file_name, 'rb') as f:
        data = pickle.load(f)
        phrase_base = data.get('phrase_base', {})
        marks = data.get('marks', {})
        return phrase_base, marks

In [3]:
phrase_base, marks = load_phrase_base('./data/texts/phrase_base.pickle')

In [6]:
from ipythonblocks import BlockGrid

def draw_notation(notation: np.array):
    bit_grid = BlockGrid(len(notation), 1, fill=(17, 41, 129))
    for block in range(bit_grid.width):
        color = bit_grid[0, block]
        if notation[block]:
            bit_grid[0, block] = (244, 195, 173)
    bit_grid.lines_on = False
    bit_grid.show()

def draw_note_notations(notes, note_idx: int):
    for i in range(len(notes[note_idx])):
        draw_notation(notes.note_notation_as_bits(note_idx, i))

In [7]:
key = list(phrase_base.keys())[0]
phrase = phrase_base[key][0]
bit_chord = abc_notes.phrase_chord(phrase)
draw_notation(abc_notes.notation_as_bits(bit_chord))
print(phrase)
for note_idx, notation_idx in phrase:
    notation = abc_notes.note_notation_as_bits(note_idx, notation_idx)
    draw_notation(notation)

[(8, 0), (0, 1), (13, 2), (24, 3), (18, 4)]


In [96]:
feeder = PhraseFeeder(phrase_base, marks)    

In [None]:
phrases, output_bits = feeder.take_phrase_bunch()
print(output_bits, marks[output_bits])
print(phrases)

In [9]:
abc_notes = notes.Notes(note_count=26, 
                        notation_count=10, 
                        active_bits=8, 
                        bit_count=255)

feeder = PhraseFeeder(phrase_base, marks)  

cxnn = context_nn.ContextNN(input_bit_count=255,
                            output_bit_count=8,
                            watch_point_count=100,
                            watch_bit_count=32,
                            cluster_make_threshold=6,
                            cluster_activate_threshold=4)

In [99]:
len(cxnn.watch_points.keys())

100

In [273]:
%%time
phrases, output_bits = feeder.take_phrase_bunch(count=200)
output_bits = set(output_bits)
for phrase in phrases:
    bit_chord = abc_notes.phrase_chord(phrase)
    cxnn.receive_bits(input_bits=bit_chord, output_bits=output_bits)
    

Wall time: 1.63 s


In [13]:
def feed_phrase_bunch(feeder, count=200):
    phrases, output_bits = feeder.take_phrase_bunch(count=200)
    output_bits = set(output_bits)
    for phrase in phrases:
        bit_chord = abc_notes.phrase_chord(phrase)
        cxnn.receive_bits(input_bits=bit_chord, output_bits=output_bits)

def show_point_stats(cxnn: ContextNN):
    cluster_counts = [wp.cluster_count() for wp in cxnn.watch_points.values()]
    output_bits = [(wp.output_bit, len(wp.clusters.values())) 
                   for wp in cxnn.watch_points.values()]
    print('Cluster count:', sum(cluster_counts))
    print(output_bits)
  

In [39]:
%%time
for i in range(10):
    feed_phrase_bunch(feeder, count=200)

Wall time: 1min 15s


In [40]:
show_point_stats(cxnn)
marks

Cluster count: 106802
[(3, 1197), (4, 889), (7, 689), (4, 1262), (1, 1069), (4, 747), (6, 871), (3, 1093), (5, 1004), (7, 530), (5, 1417), (6, 1503), (7, 646), (5, 1995), (0, 1406), (3, 1424), (4, 1148), (2, 962), (3, 817), (7, 648), (4, 957), (4, 1141), (4, 1246), (6, 1080), (4, 926), (0, 1386), (4, 1234), (4, 1407), (6, 1377), (6, 1054), (6, 811), (0, 1089), (0, 864), (3, 1061), (0, 1006), (1, 900), (0, 736), (7, 827), (2, 1075), (1, 1240), (2, 1095), (0, 861), (3, 1739), (5, 1211), (6, 966), (4, 1025), (0, 1280), (7, 789), (5, 1036), (2, 1248), (4, 741), (0, 906), (4, 1134), (6, 884), (4, 924), (3, 1482), (2, 710), (1, 953), (7, 764), (4, 1075), (3, 1591), (1, 782), (2, 932), (3, 1131), (3, 1304), (3, 1336), (3, 979), (6, 1072), (1, 664), (1, 959), (2, 814), (6, 915), (0, 1178), (7, 731), (6, 1075), (2, 1064), (5, 713), (2, 1002), (0, 1256), (0, 951), (6, 695), (1, 1154), (7, 607), (4, 1112), (7, 952), (6, 1077), (6, 1020), (3, 1424), (4, 1679), (1, 892), (4, 701), (7, 820), (5, 160

{(0, 1, 3, 6): 'jbo',
 (0, 2, 3, 5): 'rus',
 (0, 2, 3, 7): 'bel',
 (0, 2, 4, 6): 'blg',
 (1, 3, 4, 5): 'pol',
 (1, 4, 5, 7): 'eng',
 (2, 3, 5, 7): 'epo',
 (2, 4, 5, 6): 'ukr'}

In [16]:
def dict_value(d: dict, n=0) -> any:
    i = 0
    diter = iter(d)
    while i <= n:
        key = next(diter)
        i += 1
    return d[key]

In [55]:
point = dict_value(cxnn.watch_points)
cluster = dict_value(point.clusters, 12)
pprint(cluster.stats)
# pprint(cluster.component_stats())
pprint(cluster.bit_rate())

sorted(cluster.component_stats().items(), key=lambda x: x[1], reverse=True)

# parts = cluster.component_stats()
# enthropy = 0.0
# for part in parts.values():
#     enthropy += -part * math.log2(part)
# print(enthropy)

{(0, 18, 29, 74, 116, 156): 1,
 (0, 18, 29, 116, 156): 2,
 (0, 18, 74, 116): 22,
 (0, 18, 116, 156): 3,
 (0, 29, 74, 116): 1,
 (0, 74, 116, 156): 7,
 (18, 29, 74, 116): 2,
 (18, 29, 74, 116, 156): 3,
 (18, 29, 116, 156): 7,
 (18, 74, 116, 156): 11}
(array([ 0.61016949,  0.86440678,  0.27118644,  0.79661017,  1.        ,
        0.57627119]),
 [0, 18, 29, 74, 116, 156])


[((0, 18, 74, 116), 0.3728813559322034),
 ((18, 74, 116, 156), 0.1864406779661017),
 ((0, 74, 116, 156), 0.11864406779661017),
 ((18, 29, 116, 156), 0.11864406779661017),
 ((18, 29, 74, 116, 156), 0.05084745762711865),
 ((0, 18, 116, 156), 0.05084745762711865),
 ((0, 18, 29, 116, 156), 0.03389830508474576),
 ((18, 29, 74, 116), 0.03389830508474576),
 ((0, 18, 29, 74, 116, 156), 0.01694915254237288),
 ((0, 29, 74, 116), 0.01694915254237288)]

In [58]:
noize_clusters = 0
total_clusters = 0
cluster_lens = {}
cluster_acts = {}
for wp in cxnn.watch_points.values():
    for cluster in wp.clusters.values():
        total_clusters += 1
        cluster_lens[len(cluster.bits)] = cluster_lens.get(len(cluster.bits), 0) + 1
        cluster_acts[len(cluster.stats)] = cluster_acts.get(len(cluster.stats), 0) + 1
        if not cluster.has_big_component(threshold=0.2, min_activations=10):
            noize_clusters += 1
print(total_clusters)
print(noize_clusters)
pprint(cluster_lens)
pprint(cluster_acts)


106802
71907
{6: 45075,
 7: 32481,
 8: 17892,
 9: 7719,
 10: 2656,
 11: 750,
 12: 189,
 13: 36,
 14: 4}
{1: 343,
 2: 1532,
 3: 2713,
 4: 3090,
 5: 3542,
 6: 3993,
 7: 4313,
 8: 4612,
 9: 4791,
 10: 5020,
 11: 4833,
 12: 4545,
 13: 4341,
 14: 4032,
 15: 3607,
 16: 3221,
 17: 2781,
 18: 2496,
 19: 2182,
 20: 1850,
 21: 1675,
 22: 1605,
 23: 1558,
 24: 1559,
 25: 1449,
 26: 1483,
 27: 1430,
 28: 1314,
 29: 1267,
 30: 1196,
 31: 1131,
 32: 1138,
 33: 993,
 34: 952,
 35: 940,
 36: 813,
 37: 797,
 38: 728,
 39: 738,
 40: 645,
 41: 608,
 42: 550,
 43: 531,
 44: 481,
 45: 489,
 46: 419,
 47: 432,
 48: 417,
 49: 415,
 50: 396,
 51: 363,
 52: 348,
 53: 379,
 54: 327,
 55: 318,
 56: 337,
 57: 271,
 58: 257,
 59: 301,
 60: 274,
 61: 240,
 62: 249,
 63: 270,
 64: 218,
 65: 230,
 66: 198,
 67: 222,
 68: 195,
 69: 196,
 70: 172,
 71: 194,
 72: 163,
 73: 174,
 74: 154,
 75: 145,
 76: 155,
 77: 130,
 78: 135,
 79: 115,
 80: 123,
 81: 119,
 82: 131,
 83: 113,
 84: 100,
 85: 111,
 86: 99,
 87: 80,
 88: 9

In [26]:
my_cluster = Cluster((10, 20, 30), bitarray(10), 4)
print(type(my_cluster))
my_cluster.bits
my_cluster.stat_parts()

<class 'cluster.Cluster'>


{}