In [1]:
%load_ext autoreload
# %reload_ext autoreload

# Reload all modules imported with %aimport every time before executing the Python code typed
%autoreload 1

%aimport context_nn 
%aimport phrase_feeder 
%aimport bitnotes
%aimport watch_point
%aimport cluster
%aimport constants
import numpy as np
from bitarray import bitarray
from context_nn import ContextNN
from watch_point import WatchPoint
from cluster import Cluster
from phrase_feeder import PhraseFeeder
from bitnotes import BitNotes
from pprint import pprint
import math
import constants as const

In [2]:
import pickle

def load_cxnn(file_name: str) -> ContextNN:
    with open(file_name, 'rb') as f:
        cxnn = pickle.load(f)
        return cxnn
    
def load_noted_phrases(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        noted_phrases = pickle.load(f)
        return noted_phrases

In [87]:
cxnn = load_cxnn('./data/cxnn_langs.pickle')
noted_phrases = load_noted_phrases('./data/texts/tst_blg.pickle')

In [None]:
# cxnn = load_cxnn('./data/cxnn_monotone.pickle')
# noted_phrases = load_noted_phrases('./data/texts/tst_bbb.pickle')

In [88]:
%%time

cxnn.start_detection()

for num, phrase in enumerate(noted_phrases[:2500]):
    bit_chord = cxnn.bit_notes.phrase_chord_as_bits(phrase)
    cxnn.detect_bits(bit_chord)
    if num % 500 == 0:
        print(f'phrase # {num}/{len(noted_phrases)}')

phrase # 0/4115
phrase # 500/4115
phrase # 1000/4115
phrase # 1500/4115
phrase # 2000/4115
Wall time: 23min 19s


In [None]:
print(cxnn.cluster_count())
pprint(cxnn.cluster_len_stats())
pprint(cxnn.cluster_activity_stats())

In [89]:
detection = cxnn.summarize_detection()

0 277.6678052110872
1 188.92631464954906
2 112.26196424119722
3 213.48815222464938
4 227.93308310491116
5 212.69905481727722
6 158.57075037995637
7 281.2730513462688


In [50]:
cxnn.data_marks

{(0,): 'bel',
 (1,): 'blg',
 (2,): 'eng',
 (3,): 'epo',
 (4,): 'jbo',
 (5,): 'pol',
 (6,): 'rus',
 (7,): 'ukr'}

In [78]:
for bits, label in cxnn.data_marks.items():
    rate = np.sum(detection[list(bits)])
    print(label, rate)

bel 193.311594254
blg 154.818164486
eng 91.2800622718
epo 137.287250615
jbo 154.806680364
pol 67.8774603098
rus 112.694601086
ukr 170.570249061


In [57]:
cxnn.vectors_received

2990

In [91]:
print(cxnn.cluster_count())
points = [(bit, count) for bit, count in cxnn.point_stats() if count > 0]
# print(sorted(points, key=lambda x: x[0]))
bit_points = {}
for bit, count in points:
    bit_points[bit] = bit_points.get(bit, 0) + 1

print(sorted(bit_points.items()))
    
bit_clusters = {}
for bit, count in points:
    bit_clusters[bit] = bit_clusters.get(bit, 0) + count

print(sorted(bit_clusters.items()))

bit_activations = {}
for point in cxnn.point_objects:
    key = point.output_bit
    bit_activations[key] = bit_activations.get(key, 0) + sum(cluster.activation_count() for cluster in point.cluster_objects)

print(sorted(bit_activations.items()))

bit_divs = {}
for key in bit_clusters.keys():
    bit_divs[key] = bit_activations[key] / (bit_clusters[key] * bit_points[key])

pprint(sorted(bit_divs.items()))


158335
[(0, 762), (1, 685), (2, 700), (3, 650), (4, 499), (5, 310), (6, 510), (7, 666)]
[(0, 46131), (1, 21117), (2, 14423), (3, 17599), (4, 14382), (5, 6245), (6, 12572), (7, 25866)]
[(0, 9360132), (1, 4895444), (2, 2021554), (3, 4140911), (4, 3344792), (5, 1795976), (6, 2955804), (7, 6292907)]
[(0, 0.26627729282425244),
 (1, 0.3384303441133843),
 (2, 0.20023117837580848),
 (3, 0.36198831227298756),
 (4, 0.4660680002753386),
 (5, 0.9276975128489888),
 (6, 0.4610001684415414),
 (7, 0.36529843459790107)]


In [11]:
sum([624, 342637, 0.0, 351069, 2021, 789, 350828, 2955, 274, 1810, 337907, 1010, 3099])

1395023.0

In [13]:
sum([556, 593, 1233, 3074, 4708, 689398, 24, 10971, 4258, 492475, 2502, 6425])

1216217

In [None]:
[0.0, 0.0, 596, 268, 520, 81307, 0.0, 0.0, 0.0, 0.0, 310, 0.0, 79]
[0.0, 0.0, 678, 189, 268, 111389, 0.0, 0.0, 0.0, 0.0, 158, 0.0, 50]