In [1]:
%load_ext autoreload
# %reload_ext autoreload

# Reload all modules imported with %aimport every time before executing the Python code typed
%autoreload 1

%aimport context_nn 
%aimport phrase_feeder 
%aimport bitnotes
%aimport watch_point
%aimport cluster
%aimport constants
import numpy as np
from bitarray import bitarray
from context_nn import ContextNN
from watch_point import WatchPoint
from cluster import Cluster
from phrase_feeder import PhraseFeeder
from bitnotes import BitNotes
from pprint import pprint
import math
import constants as const

In [2]:
import pickle

def load_cxnn(file_name: str) -> ContextNN:
    with open(file_name, 'rb') as f:
        cxnn = pickle.load(f)
        return cxnn
    
def load_noted_phrases(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        noted_phrases = pickle.load(f)
        return noted_phrases

In [63]:
cxnn = load_cxnn('./data/cxnn_langs.pickle')
noted_phrases = load_noted_phrases('./data/texts/tst_rus.pickle')

In [None]:
# cxnn = load_cxnn('./data/cxnn_monotone.pickle')
# noted_phrases = load_noted_phrases('./data/texts/tst_bbb.pickle')

In [None]:
%%time

cxnn.start_detection()

for num, phrase in enumerate(noted_phrases):
    bit_chord = cxnn.bit_notes.phrase_chord_as_bits(phrase)
    cxnn.detect_bits(bit_chord)
    if num % 500 == 0:
        print(f'phrase # {num}/{len(noted_phrases)}')

phrase # 0/2755


In [None]:
print(cxnn.cluster_count())
pprint(cxnn.cluster_len_stats())
pprint(cxnn.cluster_activity_stats())

In [60]:
detection = cxnn.summarize_detection()

0 147.46778621941672
1 146.28623124368573
2 110.52996570787153
3 146.60498329468885
4 152.81155410297075
5 89.99321748658885
6 133.84017098951026
7 147.4412542543405


In [50]:
cxnn.data_marks

{(0,): 'bel',
 (1,): 'blg',
 (2,): 'eng',
 (3,): 'epo',
 (4,): 'jbo',
 (5,): 'pol',
 (6,): 'rus',
 (7,): 'ukr'}

In [32]:
for bits, label in cxnn.data_marks.items():
    rate = np.sum(detection[list(bits)])
    print(label, rate)

bel 559.574733855
blg 178.632143638
eng 108.987736043
epo 183.595182381
jbo 353.674665459
pol 106.80813494
rus 204.339268233
ukr 274.450320353


In [57]:
cxnn.vectors_received

2990

In [62]:
print(cxnn.cluster_count())
points = [(bit, count) for bit, count in cxnn.point_stats() if count > 0]
# print(sorted(points, key=lambda x: x[0]))
bit_points = {}
for bit, count in points:
    bit_points[bit] = bit_points.get(bit, 0) + 1

print(sorted(bit_points.items()))
    
bit_clusters = {}
for bit, count in points:
    bit_clusters[bit] = bit_clusters.get(bit, 0) + count

print(sorted(bit_clusters.items()))

bit_activations = {}
for point in cxnn.point_objects:
    key = point.output_bit
    bit_activations[key] = bit_activations.get(key, 0) + sum(cluster.activation_count() for cluster in point.cluster_objects)

print(sorted(bit_activations.items()))

bit_divs = {}
for key in bit_clusters.keys():
    bit_divs[key] = bit_activations[key] / (bit_clusters[key] * bit_points[key])

pprint(sorted(bit_divs.items()))


66439
[(0, 458), (1, 409), (2, 432), (3, 385), (4, 323), (5, 285), (6, 352), (7, 443)]
[(0, 16532), (1, 8860), (2, 5986), (3, 5867), (4, 6557), (5, 2725), (6, 5956), (7, 13956)]
[(0, 3123612), (1, 1876281), (2, 1161246), (3, 1330939), (4, 1394887), (5, 573248), (6, 1260039), (7, 2639704)]
[(0, 0.4125401365302386),
 (1, 0.5177747299751085),
 (2, 0.44905937929242307),
 (3, 0.5892252284957245),
 (4, 0.6586145499031829),
 (5, 0.7381271527442459),
 (6, 0.6010168317662861),
 (7, 0.4269632970956123)]


In [11]:
sum([624, 342637, 0.0, 351069, 2021, 789, 350828, 2955, 274, 1810, 337907, 1010, 3099])

1395023.0

In [13]:
sum([556, 593, 1233, 3074, 4708, 689398, 24, 10971, 4258, 492475, 2502, 6425])

1216217

In [None]:
[0.0, 0.0, 596, 268, 520, 81307, 0.0, 0.0, 0.0, 0.0, 310, 0.0, 79]
[0.0, 0.0, 678, 189, 268, 111389, 0.0, 0.0, 0.0, 0.0, 158, 0.0, 50]