In [2]:
import os

import numpy as np
import scipy as sp
import pandas as pd
import pickle as pkl

from misc import rkc_utils
from pybdm import BDM
from pybdm import PartitionIgnore
from pyinform.blockentropy import block_entropy
from compress import Compressor

from misc.database import Database

# plotting tools
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from bokeh.models import CustomJS, Slider, ColumnDataSource, Whisker, HoverTool, Span

output_notebook()

In [3]:
# Python3 Program to find nth term of 
# Thue-Morse sequence. 
  
# Return the complement of the 
# binary string. 
def complement(s): 
    comps = ""; 
  
    # finding the complement 
    # of the string. 
    for i in range(len(s)):  
  
    # if character is 0, append 1 
        if (s[i] == '0'): 
            comps += '1'; 
  
        # if character is 1, append 0. 
        else: 
            comps += '0'; 
  
    return comps; 
  
# Return the nth term of  
# Thue-Morse sequence. 
def thue_morse(n): 
  
    # Initialing the string to 0 
    s = "0"; 
  
    # Running the loop for n - 1 time. 
    for i in range(1, n):  
  
        # appending the complement of  
        # the string to the string. 
        s += complement(s); 
      
    return s; 

In [4]:
# print(thue_morse(7))
thue_morse_seq = thue_morse(10)
# print(len(thue_morse_seq))
strings = []

for i in range(50, 550, 50):
    strings.append([ int(val) for val in list(thue_morse_seq[:i]) ])

# strings = [
#     [ int(val) for val in list(thue_morse_seq[:50]) ],
#     [ int(val) for val in list(thue_morse_seq[:100]) ],
#     [ int(val) for val in list(thue_morse_seq[:150]) ],
#     [ int(val) for val in list(thue_morse_seq[:200]) ],
#     [ int(val) for val in list(thue_morse_seq[:250]) ]
# ]
# print(strings)

In [5]:
s = "0"; 
idx = 0

# Running the loop for n - 1 time. 
for i in range(1, 10):  

    # appending the complement of  
    # the string to the string. 
    segment = complement(s)
    s += complement(s); 
    segidx = 0
    for c in segment:
        idx += 1
        print("{} {} {}".format(idx, segidx, c))
        segidx += 1
    print("-------------------------")

# return s; 

1 0 1
-------------------------
2 0 1
3 1 0
-------------------------
4 0 1
5 1 0
6 2 0
7 3 1
-------------------------
8 0 1
9 1 0
10 2 0
11 3 1
12 4 0
13 5 1
14 6 1
15 7 0
-------------------------
16 0 1
17 1 0
18 2 0
19 3 1
20 4 0
21 5 1
22 6 1
23 7 0
24 8 0
25 9 1
26 10 1
27 11 0
28 12 1
29 13 0
30 14 0
31 15 1
-------------------------
32 0 1
33 1 0
34 2 0
35 3 1
36 4 0
37 5 1
38 6 1
39 7 0
40 8 0
41 9 1
42 10 1
43 11 0
44 12 1
45 13 0
46 14 0
47 15 1
48 16 0
49 17 1
50 18 1
51 19 0
52 20 1
53 21 0
54 22 0
55 23 1
56 24 1
57 25 0
58 26 0
59 27 1
60 28 0
61 29 1
62 30 1
63 31 0
-------------------------
64 0 1
65 1 0
66 2 0
67 3 1
68 4 0
69 5 1
70 6 1
71 7 0
72 8 0
73 9 1
74 10 1
75 11 0
76 12 1
77 13 0
78 14 0
79 15 1
80 16 0
81 17 1
82 18 1
83 19 0
84 20 1
85 21 0
86 22 0
87 23 1
88 24 1
89 25 0
90 26 0
91 27 1
92 28 0
93 29 1
94 30 1
95 31 0
96 32 0
97 33 1
98 34 1
99 35 0
100 36 1
101 37 0
102 38 0
103 39 1
104 40 1
105 41 0
106 42 0
107 43 1
108 44 0
109 45 1
110 46 1
111 47 

In [6]:
size_till_now = 1
segment_ends = []
segment_prev = []
for i in range(10):
    size_till_now = 2*size_till_now
    
    size_div = 1
    while size_div < size_till_now:
        size_div = 2*size_div
        segment_prev.append(Span(location=size_till_now+size_div-1, 
                             dimension='height', 
                             line_color='green', 
                             line_width=1))
        
    segment_ends.append(Span(location=size_till_now-1, 
                             dimension='height', 
                             line_color='blue', 
                             line_width=2))

In [7]:
plot_options = dict(width=450,
                    plot_height=250,
                    tools='pan,wheel_zoom,reset,save')
ctm = []

bdm = BDM(ndim=1, partition=PartitionIgnore)
for string in strings:
    b_ent = []
    ctm.append(bdm.bdm(np.array(string)))
    
ctm_plot = figure(title="BDM/CTM", **plot_options)
ctm_plot.line(range(50, 550, 50),
                ctm,
                line_width=2,
             line_color='red')
ctm_plot.renderers.extend(segment_prev)
ctm_plot.renderers.extend(segment_ends)
# print(ctm)
ctm_plot.xaxis.axis_label = 'segment_length'
ctm_plot.yaxis.axis_label = 'Kolmogorov Complexity(bits)'
show(ctm_plot)

In [8]:
## Use the compression module create one plot with each of the following compressors
## zlib, bz2, lzma
## Each line should be distinguished by different colors
## TASK: string vs compressed length plot

z_lib = []
bz2 = []
lzma = []

def bitstring_to_bytes(s):
    return int(s).to_bytes(len(s) // 8, byteorder='big')

c = Compressor()

for string in strings:
    string = [ str(val) for val in string ]
    string = ''.join(string)
    
    c.use_zlib()
    z_lib.append(len(c.compress(string.encode('utf-8'))))
    
    c.use_bz2()
    bz2.append(len(c.compress(string.encode('utf-8'))))
    
    c.use_lzma()
    lzma.append(len(c.compress(string.encode('utf-8'))))

comp_plot = figure(**plot_options)
comp_plot.line(range(50, 550, 50),
                z_lib,
                legend_label="zlib",
                line_color='red',
                line_width=2)
comp_plot.line(range(50, 550, 50),
                bz2,
                legend_label="bz2",
                line_color='green',
                line_width=2)
comp_plot.line(range(50, 550, 50),
                lzma,
                legend_label="lzma",
                line_color='purple',
                line_width=2)
comp_plot.renderers.extend(segment_prev)
comp_plot.renderers.extend(segment_ends)

comp_plot.legend.click_policy="hide"
comp_plot.xaxis.axis_label = 'segment length'
comp_plot.yaxis.axis_label = 'compressed length'
show(comp_plot)

In [9]:
comp_plot = figure(**plot_options)
comp_plot.line(range(50, 550, 50),
                z_lib,
                line_color='red',
                line_width=2)

comp_plot.legend.click_policy="hide"
comp_plot.xaxis.axis_label = 'segment length'
comp_plot.yaxis.axis_label = 'compressed length (bytes)'
show(comp_plot)

You are attempting to set `plot.legend.click_policy` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



In [28]:
from misc import rkc_utils
import importlib

importlib.reload(rkc_utils)

results_path = "/home/arjun/research/result_dbs"

lstm_param_list = []
ntm_param_list = []
lstm_oracle_size_list = []
ntm_oracle_size_list = []

for i in range(5):
    min_param, oracle_size = rkc_utils.algo_entropy_from_db(
                                        os.path.join(results_path, 
                                                     "result_tm_lstm_s100_d1_omax.db"), 
                                         i, 
                                         accuracy=0.5)
    lstm_param_list.append(min_param)
    lstm_oracle_size_list.append(oracle_size)
    
    min_param, oracle_size = rkc_utils.algo_entropy_from_db(
                                        os.path.join(results_path, 
                                                     "result_tm_ntm_s100_d1_omax.db"), 
                                         i, 
                                         accuracy=0.5)
    ntm_param_list.append(min_param)
    ntm_oracle_size_list.append(oracle_size)

# print(np.min(lstm_param_list))
lstm_param_list = np.array(lstm_param_list)
ntm_param_list = np.array(ntm_param_list)
lstm_param_list -= np.min(lstm_param_list)
ntm_param_list -= np.min(ntm_param_list)
lstm_param_list = lstm_param_list/np.max(lstm_param_list)
ntm_param_list = ntm_param_list/np.max(ntm_param_list)

print(lstm_param_list)
print(oracle_size_list)
comp_plot = figure(**plot_options)
comp_plot.line(range(50, 300, 50),
                lstm_param_list,
                line_color='red',
                legend_label="lstm",
                line_width=2)
comp_plot.line(range(50, 300, 50),
                ntm_param_list,
                legend_label="ntm",
                line_color='blue',
                line_width=2)
comp_plot.renderers.extend(segment_prev)
comp_plot.renderers.extend(segment_ends)
comp_plot.legend.click_policy="hide"
comp_plot.xaxis.axis_label = 'Thue Morse Segment Length'
comp_plot.yaxis.axis_label = 'Estimated Algorithmic Entropy (bits)'
show(comp_plot)

comp_plot = figure(**plot_options)
comp_plot.line(range(50, 300, 50),
                lstm_oracle_size_list,
                line_color='red',
                legend_
                line_width=2)
comp_plot.line(range(50, 300, 50),
                ntm_oracle_size_list,
                line_color='red',
                line_width=2)
comp_plot.renderers.extend(segment_prev)
comp_plot.renderers.extend(segment_ends)
# comp_plot.legend.click_policy="hide"
comp_plot.xaxis.axis_label = 'Thue Morse Segment Length'
comp_plot.yaxis.axis_label = 'Oracle Size (bits)'
show(comp_plot)

[0.04813519 0.         0.91731137 1.         0.99999809]
[1, 1, 1, 1, 1]


In [17]:
with open(os.path.join(results_path, 
                       "accuracy_tm_lstm_s100_d1_omax_a60.pkl"), 
          'rb') as f:
    lstm_accuracy_list = pkl.load(f)

with open(os.path.join(results_path, 
                       "accuracy_tm_ntm_s100_d1_omax_a60.pkl"), 
          'rb') as f:
    ntm_accuracy_list = pkl.load(f)

plot_options = dict(width=450,
                    plot_height=250,
                    tools='pan,xwheel_zoom,reset,save')

size_till_now = 1
segment_ends = []
segment_prev = []
for i in range(10):
    size_till_now = 2*size_till_now
    
    size_div = 1
    while size_div < size_till_now:
        size_div = 2*size_div
        segment_prev.append(Span(location=size_till_now+size_div-1, 
                             dimension='height', 
                             line_color='green', 
                             line_width=1))
        
    segment_ends.append(Span(location=size_till_now-1, 
                             dimension='height', 
                             line_color='blue', 
                             line_width=2))

train_segment_length = 50
for idx, lstm_acc_array in enumerate(lstm_accuracy_list):
    comp_plot = figure(title="Models trained on segment length={}, oracle size - min"
                       .format(train_segment_length), 
                       x_range=(0, 300), 
                       **plot_options)
    train_segment_length += 50
    comp_plot.line(range(len(lstm_acc_array)),
                    acc_array,
                    line_color='red',
                    legend_label='lstm',
                    line_width=2)
    comp_plot.line(range(len(acc_array)),
                    acc_array,
                    line_color='red',
                    line_width=2)
    comp_plot.renderers.extend(segment_prev)
    comp_plot.renderers.extend(segment_ends)
    
#     comp_plot.legend.click_policy="hide"
    comp_plot.xaxis.axis_label = 'Thue Morse idx'
    comp_plot.yaxis.axis_label = 'Number of Models Correct'
    show(comp_plot)

# print(accuracy_list)

In [None]:
accuracy_list_max = [
0.9397590361445783,
0.9558232931726908,
0.9799196787148594,
0.9558232931726908,
0.9678714859437751
]

accuracy_list_min = [
0.7710843373493976,
0.7228915662650602,
0.678714859437751,
0.6345381526104418,
0.5060240963855421,
]

comp_plot = figure(**plot_options)
train_segment_length += 50
comp_plot.line(range(50, 300, 50),
                accuracy_list_max,
                legend_label="oracle - max",
                line_color='blue',
                line_width=2)
comp_plot.line(range(50, 300, 50),
                accuracy_list_min,
                legend_label="oracle - min",
                line_color='red',
                line_width=2)
# comp_plot.renderers.extend(segment_prev)
# comp_plot.renderers.extend(segment_ends)

comp_plot.legend.location = "bottom_left"
comp_plot.legend.click_policy="hide"
comp_plot.xaxis.axis_label = 'model trained on segment length'
comp_plot.yaxis.axis_label = 'accuracy'
show(comp_plot)
