In [None]:
def get_relevant_activations(data, markup_for_tag):
    result = []
    for i, tag in enumerate(markup_for_tag):
        if tag != 0:
            result.append(data[i])
    return np.stack(result)


def get_matches(activations, markup):
    markup = np.array(markup)
    markup_devs = markup - np.mean(markup)
    activation_devs = activations - np.mean(activations, 0, keepdims=True)
    activation_stddevs = np.std(activations, 0, ddof=1, keepdims=True)
    markup_stddev = np.std(markup, ddof=1)
    activation_dev_fractions = activation_devs / (activation_stddevs + 1e-20)
    markup_dev_fractions = markup_devs / (markup_stddev + 1e-20)
    return activation_dev_fractions * np.reshape(markup_dev_fractions, [-1, 1])


def compute_stats(data, markup_for_tag):
    markup_for_tag = np.array(markup_for_tag)
    stats = {}
    stats['markup'] = markup_for_tag
    stats['relevant_markup'] = list(filter(lambda x: x != 0, markup_for_tag))
    stats['relevant_activations'] = get_relevant_activations(data, markup_for_tag)
    stats['matches'] = get_matches(stats['relevant_activations'], stats['relevant_markup'])
    stats['correlations'] = np.mean(stats['matches'], 0)
    assert stats['correlations'].ndim == 1
    stats['match_stddevs'] = np.std(stats['matches'], 0)
    stats['mean_square_correlation'] = np.sqrt(np.mean(stats['correlations']**2))
    stats['meta'] = {
        "positive": np.count_nonzero(markup_for_tag == 1),
        "negative": np.count_nonzero(markup_for_tag == -1),
        "total": len(stats['markup']),
    }
    return stats

In [None]:
import os
import pickle
import numpy as np

os.makedirs('test', exist_ok=True)

with open("test/test.pickle", 'wb') as f:
    pickle.dump(np.array([[4]*10]*50), f)

In [None]:
! ls -l test

In [None]:
import tensorflow as tf

def get_axis_quarters(tensor):
    last_dim = tf.shape(tensor, out_type=tf.float32)[-1]
    exponents = tf.range(0., last_dim, 1., dtype=tf.float32)
    powers = tf.math.pow(2., exponents)
    binary_format = tf.cast(tensor > 0, tf.float32)
    linear_combination = powers * binary_format
    numbers = tf.reduce_sum(linear_combination, axis=-1)
    return tf.cast(numbers, tf.int32)

tensor = tf.constant(
    [[1, -1, 1],
     [-1, -1, -1]]
)

axis_quarters = get_axis_quarters(tensor)

with tf.Session() as sess:
    print(sess.run(axis_quarters))

In [None]:
import pickle
import numpy as np

prefix = '/media/anton/DATA/results/h-elmo/expres/resrnn/poscorr/4/9/corr/level1_1/NNS'

tmpl = os.path.join(prefix, '{}.pickle')

In [None]:
file_name = tmpl.format('correlations')
with open(file_name, 'rb') as f:
    corr = pickle.load(f)
    
print(max(corr))
print(np.argmax(corr))

In [None]:
from collections import Counter
import numpy as np

a = np.array([1, 2, 3])

c = Counter(a)
print(c)

In [None]:
matches = tmpl.format('matches')
with open(matches, 'rb') as f:
    m = pickle.load(f)

m62 = m[:, 62]
print(m62)
print(max(m62))
print(np.argmax(m62))
print(min(m62))
print(np.argmin(m62))

In [None]:
print(np.std(m62))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
threshold = 0
f1 = m62[m62 > threshold]
f2 = m62[m62 <= -threshold]
filtered = np.concatenate([f1, f2])
plt.hist(filtered, bins=100, density=True)
plt.grid()
plt.yscale('log')
plt.xlabel('matches')

plt.show()

In [None]:
act = tmpl.format('relevant_activations')
with open(act, 'rb') as f:
    act = pickle.load(f)

In [None]:
markup = tmpl.format('relevant_markup')
with open(markup, 'rb') as f:
    markup = pickle.load(f)

In [None]:
act62 = act[:, 62]

In [None]:

plt.hist(act62, bins=100, density=True)
plt.grid()
plt.yscale('log')
plt.xlabel('activations')

plt.show()

In [None]:
from helmo.util.plot.plot_helpers import density_plot

density_plot(m62, 0.001, None, 'blue')
plt.yscale('log')
plt.xlabel('matches')
plt.ylabel('density')
plt.grid()
plt.savefig(
    '/media/anton/DATA/results/h-elmo/expres/resrnn/poscorr/4/9/corr/level1_1/NNS/plots/matches.png',
    dpi=900
)
plt.show()

In [None]:
from helmo.util.plot.plot_helpers import density_plot

density_plot(act62, 0.0001, None, 'blue')
plt.yscale('log')
plt.xlabel('activations')
plt.ylabel('density')
plt.grid()
plt.savefig(
    '/media/anton/DATA/results/h-elmo/expres/resrnn/poscorr/4/9/corr/level1_1/NNS/plots/activations.png',
    dpi=900
)
plt.show()

In [None]:
from helmo.util.plot.plot_helpers import density_plot

selected_indices = np.array(np.array(markup) + 1, dtype=bool)

In [None]:
nns_act62 = act62[selected_indices]

In [None]:
import numpy as np

print(nns_act62.shape)
print(nns_act62[:100])
min_ = np.min(act62)
max_ = np.max(act62)

In [None]:
import os

import matplotlib.pyplot as plt

density_plot(act62, 0.0001, 'all activations', 'blue')
density_plot(nns_act62, 0.0001, 'NNS activations', 'red', [min_, max_])
plt.yscale('log')
plt.xlabel('activations')
plt.ylabel('density')
plt.grid()
plt.legend(loc='best')
plt.savefig(
    os.path.join(prefix, 'plots/activations_and_nns_activations.png'),
    dpi=900
)
plt.show()

In [None]:
x = [0.01*i for i in range(-200, 200)]
y = [1 / np.log(abs(xx-1.)) for xx in x]

plt.plot(x, y)
plt.show()

In [None]:
! echo $PYTHONPATH

In [None]:
f1 = m[m > 1.]
f2 = m[m < -1.]
filtered = np.concatenate([f1, f2])
plt.hist(filtered, bins=1000, density=True)
plt.grid()
plt.show()

In [None]:
tmpl = '/media/anton/DATA/results/h-elmo/expres/resrnn/poscorr/4/9/corr/level0_0/NNS/{}.pickle'
matches = tmpl.format('matches')
with open(matches, 'rb') as f:
    m = pickle.load(f)

In [None]:
act = tmpl.format('activations')
with open(act, 'rb') as f:
    a = pickle.load(f)
    


In [None]:
stddevs = tmpl.format('match_stddevs')
with open(stddevs, 'rb') as f:
    std = pickle.load(f)

print(std)
print(max(std))
print(np.argmax(std))

In [None]:
import tensorflow as tf


def get_all_values_except_specified(tensor, excluded):
    with tf.name_scope('get_all_values_except_specified'):
        tensor = tf.reshape(tensor, [-1])
        excluded = tf.reshape(excluded, [-1])
        excluded_shape = tf.shape(excluded)
        tensor_expanded = tf.reshape(tensor, [-1, 1])
        multiples = tf.concat([[1], excluded_shape], 0)
        tensor_expanded = tf.tile(tensor_expanded, multiples)
        masks = tf.cast(tf.equal(tf.cast(tensor_expanded, tf.int32), tf.cast(excluded, tf.int32)), tf.int32)
        mask = tf.reduce_sum(masks, [1])
        mask = tf.cast(tf.cast(mask, dtype=tf.bool), dtype=tf.int32) - 1
        return tf.boolean_mask(tensor, mask)
    

tensor, num_dims, axes, output = (
                    [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],

                    5,

                    [0, 2, 4],

                    [[[[[1, 2]], [[3, 4]]], [[[5, 6]], [[7, 8]]]]],
                )

if not tf.contrib.framework.is_tensor(tensor):
    tensor = tf.constant(tensor)
if not tf.contrib.framework.is_tensor(axes):
    axes = tf.constant(axes, dtype=tf.int32)
sh = tf.shape(tensor, out_type=tf.int32)
nd = tf.shape(sh, out_type=tf.int32)[0]
assert_axes_smaller_than_num_dims = tf.assert_less(
    axes, num_dims, message='`axes` has to be less than `num_dims`')
check_num_dims = tf.assert_greater_equal(
    num_dims, nd,
    message='`num_dims` has to be greater or equal to number of dimensions in `tensor`'
)
ass_axes_bigger_or_equal_than_num_dims = tf.assert_greater_equal(axes, -num_dims)

negative_axes_mask = tf.cast(axes < 0, tf.int32)
axes += negative_axes_mask * num_dims

ones_for_expansion = tf.ones(tf.reshape(num_dims - nd, [1]), dtype=tf.int32)
shape_for_expansion = tf.concat([sh, ones_for_expansion], 0)

tensor = tf.reshape(tensor, shape_for_expansion)

# remained_axes = get_all_values_except_specified(tf.range(num_dims, dtype=tf.int32), axes)
# perm = tf.concat([axes, remained_axes], 0)
updates = tf.range(0, num_dims, 1, dtype=tf.int32)
remained_positions = get_all_values_except_specified(tf.range(num_dims, dtype=tf.int32), axes)
indices = tf.concat([axes, remained_positions], 0)
indices = tf.reshape(indices, [-1, 1])
perm_shape = tf.reshape(num_dims, [1])
perm = tf.scatter_nd(indices, updates, perm_shape)

with tf.control_dependencies([check_num_dims, assert_axes_smaller_than_num_dims, ass_axes_bigger_or_equal_than_num_dims]):
    tensor = tf.transpose(tensor, perm=perm)
    

with tf.Session() as sess:
    print(sess.run(tensor))

In [None]:
import numpy as np

from helmo.util.scripts.text_neuron_correlation import compute_stats

num_unrollings = 20
num_units = 4

# a = np.random.rand(num_units, num_unrollings)

m = np.random.choice([-1, 1, 0], num_unrollings)
a = np.stack([m]*num_units, 1)
print(a)

stats = compute_stats(a, m)
print(stats)

In [None]:
import numpy as np


def get_int_part(n):
    return str(int(n // 1))


def get_frac_part(n):
    removed = int(get_int_part(n))
    frac_part = ''
    while n % 1:
        n *= 10
        removed *= 10
        frac_part += str(int(n // 1) - removed)
        removed = int(n // 1)
    return frac_part


def get_kth_digit(number, k, default='0'):
    """Returns k-th digit. For example, in number 123.45 1 
    is 2nd digit, 3 is zeroth and 5 is -2nd. 
    If the number does not have such a digit default is returned.
    Args:
        number: float or str convertable to float
        k: integer
    Returns:
        str"""
    if isinstance(number, str):
        number = float(number)
    int_part = get_int_part(number)
    frac_part = get_frac_part(number)
    number = int_part + frac_part
    k = len(int_part) - k - 1
    if 0 <= k < len(number):
        return number[k]
    else:
        return default
    
    
def get_first_nonzero_digit_pos(n):
    if n == 0:
        return None
    int_part = get_int_part(n)
    frac_part = get_frac_part(n)
    if int(int_part):
        return len(int_part) - 1
    i = 0
    while i < len(frac_part) and not int(frac_part[i]):
        i += 1
    assert frac_part[i] != '0'
    return -i - 1


def get_acc_num_digits(std, acc):
    if std == 0:
        return None
    
    std_err = std * acc
    
    nz_err = get_first_nonzero_digit_pos(std_err)
    
    digit_1_pos_higher = get_kth_digit(std, nz_err+1)
    
    higher_digit_change = get_kth_digit(std + std_err, nz_err+1) != digit_1_pos_higher or \
        get_kth_digit(std - std_err, nz_err+1) != digit_1_pos_higher

    if higher_digit_change:
        nz_err += 1
    return nz_err

for std in np.linspace(0, 1, 101):
    last_digit = get_acc_num_digits(std, 0.2)
    print(std, last_digit)

In [None]:
get_kth_digit(123.45678, -4)

In [None]:
def get_first_nonzero_digit_pos_for_std(std, acc):
    if std == 0:
        return None
    std_fraction = std * acc
    int_part = get_int_part(std_fraction)
    frac_part = get_frac_part(std_fraction)
    if int(int_part):
        return len(int_part) - 1
    i = 0
    while i < len(frac_part) and not int(frac_part[i]):
        i += 1
    assert frac_part[i] != '0'
    return -i - 1


get_first_nonzero_digit_pos_for_std(0.0123456, 1000)

In [None]:
def get_int_part(n):
    return str(int(n // 1))


def get_frac_part(n):
    removed = int(get_int_part(n))
    frac_part = ''
    while n % 1:
        n *= 10
        removed *= 10
        frac_part += str(int(n // 1) - removed)
        removed = int(n // 1)
    return frac_part

len(get_frac_part(2.2250738585072014e-308))

In [None]:
import numpy as np

bins = np.histogram_bin_edges([1], 8, [1., 9.])
a = np.array([-1., 0., 1., 1.5, 2.5, 10.4])
d = np.digitize(a, bins)
print(d)

In [None]:
import timeit

import numpy as np


a = np.zeros([10**4, 10**4])
b = np.zeros([10**4, 10**4])

N = 100

t = timeit.timeit(
    stmt="c = a + b",
    globals=dict(a=a, b=b),
    number=N
)

print(t / N)

In [None]:
import tensorflow as tf

a = tf.Variable(0, trainable=False)

op = tf.assign_add(a, 1)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run([op, op])
    print(a.eval(sess))

In [None]:
h = net.get_default_hooks()

In [None]:
print(h)

In [None]:
print(list(net._hooks.keys()))

In [None]:
print(h['update_level0_0_hidden_state_hist'])

In [None]:
import tensorflow as tf

g = tf.group()

with tf.Session() as sess:
    print(sess.run(g))

In [None]:
from sympy import *

k, n, p = symbols('k n p')

p = Product(k**(k * binomial(n, k) * p**k * (1-p)**(n-k)), (k, 0, n))

In [None]:
r = p.doit()

In [None]:
print(r)

In [None]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.legend_handler import HandlerLine2D
%matplotlib inline
import numpy as np


def load_line(file_name, start_idx):
    x = []
    y = []
    with open(file_name) as f:
        for line in f.readlines()[start_idx:]:
            xx, yy = line.split()
            x.append(float(xx))
            y.append(float(yy))
    return [x, y]


def load_lines(file_names, start_idx):
    lines = []
    for fn in file_names:
        lines.append(load_line(fn, start_idx))
    return lines


def load_groups_of_lines(groups):
    lines = {}
    for label, specs in groups.items():
        lines[label] = load_lines(specs['names'], specs['start_idx'])
    return lines


def plot_similar_lines(lines, color, lw):
    for line in lines:
        plt.plot(line[0], line[1], lw=lw, color=color)
        
        
def add_legend(artists, labels, position):
    if position == 'outside':
        pos_dict = dict(
            bbox_to_anchor=(1.05, 1),
            loc=2,
        )
    elif position == 'upper_right':
        pos_dict = dict(
            bbox_to_anchor=(.95, .95),
            loc=1,
        )
    elif position == 'upper_left':
        pos_dict = dict(
            bbox_to_anchor=(.05, .95),
            loc=2,
        )
    elif position == 'best':
        pos_dict = {'loc': 'best'}
    ax = plt.gca()
    lgd = ax.legend(
        artists,
        labels,
        **pos_dict,
    )
    return lgd


def form_symlog_kwargs(groups):
    x_nonzero_values = []
    y_nonzero_values = []
    for group_of_lines in groups.values():
        for line in group_of_lines:
            x_nonzero_values += [x for x in line[0] if x != 0]
            y_nonzero_values += [y for y in line[1] if y != 0]
    xkwargs = dict(
        linthreshx=np.min(np.abs(x_nonzero_values))
    )
    ykwargs = dict(
        linthreshy=np.min(np.abs(y_nonzero_values))
    )
    return xkwargs, ykwargs


def plot_groups_of_lines(
        groups,
        single_lines,
        group_colors,
        single_colors,
        lw,
        xlabel,
        ylabel,
        xscale,
        yscale,
        xaxis_format,
        start_idx,
        legend_position,
        dpi,
        save_path,
        show,
):
    custom_lines = []
    labels = []
    for (label, group_of_lines), color in zip(groups.items(), group_colors):
        labels.append(label)
        custom_lines.append(Line2D([0], [0], color=color, lw=4))
        plot_similar_lines(group_of_lines, color, lw)
    for (label, line), color in zip(single_lines.items(), single_colors):
        labels.append(label)
        custom_lines.append(Line2D([0], [0], color=color, lw=4))
        plt.plot(line[0][start_idx:], line[1][start_idx:], color=color, lw=lw)
    plt.grid(which='both')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    xkwargs, ykwargs = form_symlog_kwargs(groups)
    if xscale != 'symlog':
        xkwargs = {}    
    if yscale != 'symlog':
        ykwargs = {}
    plt.xscale(xscale, **xkwargs)
    plt.yscale(yscale, **ykwargs)
    ax = plt.gca()
    ax.xaxis.set_major_formatter(xaxis_format)
    bbox_extra_artists = [add_legend(custom_lines, labels, legend_position)]
    if save_path is not None:
        os.makedirs(os.path.split(save_path)[0], exist_ok=True)
        plt.savefig(
            save_path,
            bbox_extra_artists=bbox_extra_artists,
            bbox_inches='tight',
            dpi=dpi,
        )
    if show:
        plt.show()
        
        
def load_single_lines(file_names):
    lines = {}
    for label, fn in file_names.items():
        lines[label] = load_line(fn, 0)
    return lines
        
        
def main(
        groups_of_file_names,
        file_names,
        group_colors,
        single_colors,
        lw,
        xlabel,
        ylabel,
        xscale,
        yscale,
        xaxis_format,
        start_idx,
        legend_position,
        dpi,
        save_path,
        show,
):
    groups = load_groups_of_lines(groups_of_file_names)
    single_lines = load_single_lines(file_names)
    plot_groups_of_lines(
        groups,
        single_lines,
        group_colors,
        single_colors,
        lw,
        xlabel,
        ylabel,
        xscale,
        yscale,
        xaxis_format,
        start_idx,
        legend_position,
        dpi,
        save_path,
        show,
    )

In [None]:
from collections import OrderedDict

prefix = 'expres/resrnn/word/restore_tt'
prefix2 = 'expres/resrnn/two_branches/tt'
postfix = 'results_shifted/loss_valid.txt'
postfix2 = 'results/loss_valid.txt'

nets = ['ch100_w200', 'ch100_w100', 'ch100_w50']
numbers = list(range(10))

groups_of_file_names = OrderedDict()
for net in nets:
    name = net.replace('_', ' + ') + ' posttraining'
    groups_of_file_names[name] = {'names': [], 'start_idx': 0}
    for i in numbers:
        file_name = os.path.join(prefix, net, str(i), postfix)
        groups_of_file_names[name]['names'].append(file_name)
for net in nets:
    name = net.replace('_', ' + ')
    groups_of_file_names[name] = {'names': [], 'start_idx': 200}
    for i in numbers:
        file_name = os.path.join(prefix2, net, str(i), postfix2)
        groups_of_file_names[name]['names'].append(file_name)
        
print(list(groups_of_file_names.keys()))

pre_x, pre_y = load_line(os.path.join(prefix, 'loss_pretrain.txt'), 0)

In [None]:
print(groups_of_file_names.keys())

main(
    groups_of_file_names,
    {'pretraining ch100': os.path.join(prefix, 'loss_pretrain.txt')},
    ['red', 'green', 'blue', 'pink', 'lime', 'cyan'],
    ['black'],
    0.2,
    'step',
    'loss',
    'linear',
    'linear',
    mpl.ticker.EngFormatter(),
    200,
    'outside',
    900,
    os.path.join(prefix, 'plots/pre_and_post_training.png'),
    True
)

In [None]:
import pickle
from collections import OrderedDict


def load_pickle(file_name):
    values = []
    with open(file_name, 'rb') as f:
        while True:
            try:
                values.append(pickle.load(f))
            except EOFError:
                break
    return values


prefix = '/media/anton/DATA/results/h-elmo/expres/entropy/first_experiment/hist'
path = 'tensors/valid/accumulator_postprocessing'
dirs = [str(i) for i in range(3)]
layers = ['level0_0', 'level0_1']
labels = OrderedDict([('level0_0', 'char encoder'), ('level0_1', 'char decoder')])

entropy_tmpl = 'mean_entropy_{}_hidden_state.pickle'
mi_tmpl = 'mean_mi_{}_hidden_state.pickle'

path_to_loss = os.path.join(prefix, '0/results/loss_valid.txt')

def get_steps(fn):
    steps = []
    with open(fn) as f:
        for line in f.readlines():
            step = int(line.split()[0])
            steps.append(step)
    return steps

steps = get_steps(path_to_loss)
        
entropy_lines = OrderedDict(zip(labels.values(), [[] for _ in layers]))
mi_lines = OrderedDict(zip(labels.values(), [[] for _ in layers]))

for d in dirs:
    for layer in layers:
        entropy_fn = entropy_tmpl.format(layer)
        entropy_fn = os.path.join(prefix, d, path, entropy_fn)
        mi_fn = mi_tmpl.format(layer)
        mi_fn = os.path.join(prefix, d, path, mi_fn)
        label = labels[layer]
        entropy_lines[label].append([steps, load_pickle(entropy_fn)])
        mi_lines[label].append([steps, load_pickle(mi_fn)])


In [None]:
save_path = os.path.join(prefix, 'plots/entropy.png')

plot_groups_of_lines(
    entropy_lines,
    {},
    ['red', 'blue'],
    [],
    0.4,
    'step',
    'bits',
    'log',
    'linear',
    mpl.ticker.ScalarFormatter(),
    0,
    'best',
    900,
    save_path,
    True,
)

In [None]:
save_path = os.path.join(prefix, 'plots/mi.png')

plot_groups_of_lines(
    mi_lines,
    {},
    ['red', 'blue'],
    [],
    0.4,
    'step',
    'bits',
    'log',
    'linear',
    mpl.ticker.ScalarFormatter(),
    0,
    'best',
    900,
    save_path,
    True,
)

In [None]:
import pickle
from collections import OrderedDict


def load_pickle(file_name):
    values = []
    with open(file_name, 'rb') as f:
        while True:
            try:
                values.append(pickle.load(f))
            except EOFError:
                break
    return values


prefix = '/media/anton/DATA/results/h-elmo/expres/correlation/nocorrloss/wide'
path = 'tensors/valid/pickle_mean_tensors/mean_sqr_correlation.pickle'
dirs = [str(i) for i in range(3)]
dropout = ['0', '0.2', '0.4', '0.7']
labels = OrderedDict([('0', 'dropout 0'), ('0.2', 'dropout 0.2'), ('0.4', 'dropout 0.4'), ('0.7', 'dropout 0.7')])

path_to_loss = os.path.join(prefix, '0/0/results/loss_valid.txt')

def get_steps(fn):
    steps = []
    with open(fn) as f:
        for line in f.readlines():
            step = int(line.split()[0])
            steps.append(step)
    return steps

steps = get_steps(path_to_loss)
print(steps)
        
correlation_lines = OrderedDict(zip(labels.values(), [[] for _ in dropout]))

for d in dirs:
    for dp in dropout:
        correlation_fn = os.path.join(prefix, dp, d, path)
        label = labels[dp]
        y = load_pickle(correlation_fn)[0]
        print(y.shape)
        correlation_lines[label].append([steps, y])
a = correlation_lines['dropout 0.4'][0][1]
# correlation_lines['dropout 0.4'][0][1] = np.concatenate([a[:64], a[65:]])
# m = float('inf')
# for i in range(100):
#     if abs(a[i] - a[i+1]) < m:
#         m = abs(a[i] - a[i+1])
#         j = i
# print(j, abs(a[j] - a[j+1]))

In [None]:
save_path = os.path.join(prefix, 'plots/mean_sqr_correlation_bundles.png')

plot_groups_of_lines(
    correlation_lines,
    {},
    ['red', 'blue', 'black', 'brown'],
    [],
    0.4,
    'step',
    'mean square correlation',
    'log',
    'linear',
    mpl.ticker.ScalarFormatter(),
    0,
    'best',
    900,
    save_path,
    True,
)

In [None]:
import pickle
from collections import OrderedDict


def load_pickle(file_name):
    values = []
    with open(file_name, 'rb') as f:
        while True:
            try:
                values.append(pickle.load(f))
            except EOFError:
                break
    return values


prefix = '/home/anton/h-elmo/expres/correlation/nocorrloss/sgd'
path = 'tensors/valid/pickle_mean_tensors/correlation.pickle'
dirs = [str(i) for i in range(3)]

path_to_loss = os.path.join(prefix, '0/results/loss_valid.txt')

def get_steps(fn):
    steps = []
    with open(fn) as f:
        for line in f.readlines():
            step = int(line.split()[0])
            steps.append(step)
    return steps

steps = get_steps(path_to_loss)
print(steps)
        
correlation_lines = OrderedDict(zip(['sgd'], [[]]))

for d in dirs:
    correlation_fn = os.path.join(prefix, d, path)
    label = 'sgd'
    y = load_pickle(correlation_fn)
    y = [yy**0.5 for yy in y]
    correlation_lines[label].append([steps, y])
# correlation_lines['dropout 0.4'][0][1] = np.concatenate([a[:64], a[65:]])
# m = float('inf')
# for i in range(100):
#     if abs(a[i] - a[i+1]) < m:
#         m = abs(a[i] - a[i+1])
#         j = i
# print(j, abs(a[j] - a[j+1]))

In [None]:
save_path = os.path.join(prefix, 'plots/mean_sqr_correlation_bundles.png')

plot_groups_of_lines(
    correlation_lines,
    {},
    ['red', 'blue', 'black', 'brown'],
    [],
    0.4,
    'step',
    'mean square correlation',
    'log',
    'linear',
    mpl.ticker.ScalarFormatter(),
    0,
    'best',
    900,
    save_path,
    True,
)

In [None]:
import numpy as np

a = np.int32(2*10**9)
b = np.int32(10**9)
print(a+b)

In [None]:
for i in range(3):
  for layer in ['level0_0', 'level0_1']:
    fn = tmpl.format(i, layer)
    values = load_pickle(fn)
    m = [np.min(v) for v in values]
    for mm in m:
      if mm < 0:
        print(mm)

In [None]:
for d in {0..2}; do for l in "${layers[@]}"; do python3 $SCRIPTS/hist2entropy.py ${d}/${path}/hist_fixed^C{l}_hidden_state.pickle ${d}/${path}/entropy_${l}_hidden_state.pickle;     python3 $SCRIPTS/hist2mi.py ${d}/${path}/hist_fixed_${l}_hidden_state.pickle ${d}/${path}/cross_hist_fixed_${l}_hidden_state.pickle ${d}/${path}/mi_${l}_hidden_state.pickle; done; done

In [None]:
from sympy.solvers import solve
from sympy import *

a, x = symbols('a x')
sol = solve(0.5*x-0.25*sin(2*x)-a, x)

print(sol)

In [None]:
from helmo.util.sampling import sample_hidden_sizes

hidden_sizes = sample_hidden_sizes(321600, 0.9, 4, 100, 100)
print(len(hidden_sizes))
for hs in hidden_sizes:
    print(sum(hs))
print(hidden_sizes)

In [None]:
def get_num_lstm_param(input_size, hidden_size):
    num_param = (input_size+hidden_size) * (4*hidden_size) + 4*hidden_size
    return num_param


def get_num_multi_lstm_param(input_size, hidden_sizes):
    num_param = 0
    for hs in hidden_sizes:
        num_param += get_num_lstm_param(input_size, hs)
        input_size = hs
    return num_param

In [None]:
for hs in hidden_sizes:
    print(hs, get_num_multi_lstm_param(100, hs))

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
print(matplotlib.__version__)
%matplotlib notebook

points = []
for _ in range(1000):
    p = sample_point_from_sum_triangle(100, 4, 1)
    if any([c < 0 for c in p]):
        print('Error!', p)
        break
#     points.append(p)
    
# x, y, z = zip(*points)
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# ax.scatter(x, y, z)
# plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
print(matplotlib.__version__)
%matplotlib notebook

from helmo.util.sampling import sample_point_from_sum_prism

points = []
for _ in range(1000):
    p = sample_point_from_sum_prism(3, 1)
#     print(p)
    points.append(p)
    
x, y, z = zip(*points)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z)
plt.xlabel('X')
plt.ylabel('Y')
plt.zlabel('Z')
plt.show()

In [None]:
points = sample_shifted_points_inside_sphere_with_constant_sum(
    [100, 100, 100],
    
)

In [None]:
print(sum([9.346068550516723, -0.49577868439307227, 36.18069694773759, 54.96901318613876]))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

psi_values = [sample_psi() for _ in range(100000)]
plt.hist(psi_values, bins=1000)
plt.show()

In [None]:
import math

%matplotlib inline
import matplotlib.pyplot as plt


range_ = [-math.pi / 2, math.pi / 2]
d = (range_[1] - range_[0]) / 400
x = [range_[0] + d * i for i in range(400)]
y = [0.5 * (xx + 0.5*math.sin(2*xx)) for xx in x]

plt.plot(x, y)
plt.grid()
plt.show()

In [None]:
import math

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

range_ = [-math.pi / 2, math.pi / 2]
d = (range_[1] - range_[0]) / 400
x = [range_[0] + d * i for i in range(400)]
y = [-np.sin(xx) for xx in x]

plt.plot(x, y)
plt.grid()
plt.show()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

theta_values = [sample_theta() for _ in range(10000)]
plt.hist(theta_values, bins=1000)
plt.show()

In [None]:
import math

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

range_ = [-math.pi / 2, math.pi / 2]
d = (range_[1] - range_[0]) / 400
x = [range_[0] + d * i for i in range(400)]
y = [np.cos(xx) for xx in x]

plt.plot(x, y)
plt.grid()
plt.show()

In [None]:
import math

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

range_ = [-math.pi / 2, math.pi / 2]
d = (range_[1] - range_[0]) / 400
x = [range_[0] + d * i for i in range(400)]
y = [np.cos(xx)**2 for xx in x]

plt.plot(x, y)
plt.grid()
plt.show()

In [None]:
import random
import timeit

t1 = timeit.timeit(
    stmt="random.uniform(0,1)",
    number=1000,
    globals=dict(random=random),
)
t2 = timeit.timeit(
    stmt="random.uniform(0,100)",
    number=1000,
    globals=dict(random=random),
)
print(t1, t2)

In [None]:
def log_net(start, end, factor):
    net = []
    while start < end:
        net.append(start)
        start *= factor
        r_start = round(start)
        if r_start <= start:
            r_start = r_start+1
        r_start = int(r_start)
        start = r_start
    net.append(end)
    return net
    
    
net = log_net(0, 80000, 1.0545)
print(len(net))
print(net)

In [None]:
from learning_to_learn.controller import Controller

spec = {
    "type": "logarithmic_truth",
    "start": 0,
    "factor": 1.02,
    "end": 1000,
}

storage = {'step': 0}

controller = Controller(storage, spec)

count = 0
for _ in range(1000):
    if controller.get():
        count += 1
        # print(storage['step'])
    storage['step'] += 1
print(count)

In [None]:
from collections import OrderedDict


def parse_pos_corr_table(table_file_name, layer_names):
    with open(table_file_name) as f:
        table_text = f.read()
    text_by_tags = table_text.split('\n')
    text_by_tags = [line for line in text_by_tags if line]
    data = OrderedDict(zip(layer_names, [[[], [], []] for _ in layer_names]))
    for i, line in enumerate(text_by_tags):
        values_and_errors = line.split(';')
        for layer_name, vne in zip(layer_names, values_and_errors):
            v, e = vne.split(' ± ')
            v = float(v)
            e = float(e)
            data[layer_name][0].append(i)
            data[layer_name][1].append(v)
            data[layer_name][2].append(e)
    return data


data = parse_pos_corr_table(
    'expres/resrnn/poscorr/4/tables/table.csv',
    ['char encoder', 'char decoder', 'word encoder', 'word decoder'],
)
print(data)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt


def set_size(w,h, ax=None):
    """ w, h: width, height in inches """
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)


def mark_x_axis(tags):
    n = len(tags)
    plt.xticks(range(n))
    set_size(12, 4)
    plt.gca().set_xticklabels(tags)


def read_tags(tag_file_name):
    with open(tag_file_name) as f:
        tags = f.readlines()
    return [t.strip() for t in tags]


def add_upper_x_axis(upper_tags):
    xlim = plt.xlim()
    secax = plt.twiny()
    secax.set_xlim(*xlim)
    secax.set_xticks(range(len(upper_tags)))
    secax.set_xticklabels(upper_tags, rotation='vertical')


def tag_plot(data, tags, colors, markers, xlabel, ylabel, upper_tags=None):
    _, ax = plt.subplots()
    for (label, dt), color, mk in zip(data.items(), colors, markers):
        ax.errorbar(dt[0], dt[1], yerr=dt[2], color=color, marker=mk, label=label, linestyle=' ')
    mark_x_axis(tags)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if upper_tags is not None:
        add_upper_x_axis(upper_tags)
    ax.grid()
    ax.legend(loc='best')


tags = read_tags('dataset_tags/text8_first6.4e5_tags.txt')
upper_tags = read_tags('dataset_tags/text8_first6.4e5_tag_description.txt')
n = len(data['char encoder'][0])
tag_plot(
    data,
    tags[:n],
    ['red', 'green', 'blue', 'black'],
    ['o', 'o', 'o', 'o'],
    'part of speech',
    'correlation',
    upper_tags=upper_tags[:n],
)
plt.show()


In [None]:
!python3.6 -m pip install --upgrade matplotlib

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

print(mpl.__version__)
fig, ax = plt.subplots()
print(type(ax))
attributes = dir(ax)
for m in attributes:
    if 'secondary' in m:
        print(m)

In [None]:
import tensorflow as tf

r = tf.random.normal([3, 2])
s = tf.summary.tensor_summary('r', r)

with tf.Session() as sess:
    writer = tf.summary.FileWriter('testres/summary', filename_suffix='wow')
    for i in range(1000):
        writer.add_summary(sess.run(s), global_step=i)
    writer.flush()

In [None]:
import glob, os
def get_event(dir_path):
    return max(
      glob.glob('{}/*'.format(dir_path)),
                key=os.path.getctime)


import numpy as np
from tensorflow.train import summary_iterator
def get_lc(event_file):
    lc = list(summary_iterator(event_file))
    return(lc)

In [None]:
import tests.integration.test_integration as mod

In [None]:
with open('/home/anton/test/test.txt', 'w') as f:
    f.write(str('foo'))

In [None]:
print(mod.get_number_from_file('/home/anton/test/test.txt'))

In [None]:
import multiprocessing as mp

import tests.utils_for_testing.tf_utils as tutil
import tensorflow.compat.v1 as tf


func = tf.zeros
args = [(3, 2)]
q = mp.Queue()
p = mp.Process(target=tutil.evaluate_tensor_in_sep_process, args=(q, func, args))
p.start()
print(q.get())
p.join()

In [None]:
!pip install plotly

In [None]:
%matplotlib inline

import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt


df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/4/num_nodes_and_loss.csv',
)
fig = px.parallel_coordinates(df, color='loss', color_continuous_scale=px.colors.diverging.Tealrose,)
fig.show()

In [None]:
%matplotlib inline

import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt


df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/3/num_nodes_and_loss.csv',
)
fig = px.parallel_coordinates(df, color='loss', color_continuous_scale=px.colors.diverging.Tealrose,)
fig.show()

In [None]:
%matplotlib inline

import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt


df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/2/num_nodes_and_loss.csv',
)
fig = px.parallel_coordinates(df, color='loss', color_continuous_scale=px.colors.diverging.Tealrose,)
fig.show()

In [None]:
import plotly
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd


plotly.io.orca.config.executable = '/home/anton/anaconda3/bin/orca'

df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/4/num_nodes_and_loss.csv',
)

fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = df['loss'],
                   colorscale = px.colors.sequential.Viridis,
                   showscale = True),
        dimensions = list([
            dict(values=df['layer 1'], label='layer 1'),
            dict(values=df['layer 2'], label='layer 2'),
            dict(values=df['layer 3'], label='layer 3'),
            dict(values=df['layer 4'], label='layer 4'),
            dict(constraintrange = [1.22,1.225], values = df['loss'], label='loss'),
        ]
        )
    )
)
fig.write_image('/home/anton/h-elmo/expres/correlation/batch/nn_const/4/best.png')
fig.show()


In [None]:
import plotly
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd


plotly.io.orca.config.executable = '/home/anton/anaconda3/bin/orca'

df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/3/num_nodes_and_loss.csv',
)

fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = df['loss'],
                   colorscale = px.colors.sequential.Viridis,
                   showscale = True),
        dimensions = list([
            dict(values=df['layer 1'], label='layer 1'),
            dict(values=df['layer 2'], label='layer 2'),
            dict(values=df['layer 3'], label='layer 3'),
            dict(constraintrange = [1.255,1.27], values = df['loss'], label='loss'),
        ]
        )
    )
)
fig.write_image('/home/anton/h-elmo/expres/correlation/batch/nn_const/3/worst.png')
fig.show()


In [None]:
import plotly
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd


plotly.io.orca.config.executable = '/home/anton/anaconda3/bin/orca'

df = pd.read_csv(
    '/home/anton/h-elmo/expres/correlation/batch/nn_const/2/num_nodes_and_loss.csv',
)

fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = df['loss'],
                   colorscale = px.colors.sequential.Viridis,
                   showscale = True),
        dimensions = list([
            dict(values=df['layer 1'], label='layer 1'),
            dict(values=df['layer 2'], label='layer 2'),
            dict(constraintrange = [1.27,1.275], values = df['loss'], label='loss'),
        ]
        )
    )
)
fig.write_image('/home/anton/h-elmo/expres/correlation/batch/nn_const/2/best.png')
fig.show()


In [None]:
! pip install psutil requests

In [None]:
import plotly.graph_objects as go

import pandas as pd


df = pd.read_csv("https://raw.githubusercontent.com/bcdunbar/datasets/master/parcoords_data.csv")
print(df)

fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = df['colorVal'],
                   colorscale = 'Electric',
                   showscale = True,
                   cmin = -1.22,
                   cmax = -100),
        dimensions = list([
            dict(range = [32000,227900],
                 constraintrange = [100000,150000],
                 label = "Block Height", values = df['blockHeight']),
            dict(range = [0,700000],
                 label = 'Block Width', values = df['blockWidth']),
            dict(tickvals = [0,0.5,1,2,3],
                 ticktext = ['A','AB','B','Y','Z'],
                 label = 'Cyclinder Material', values = df['cycMaterial']),
            dict(range = [-1,4],
                 tickvals = [0,1,2,3],
                 label = 'Block Material', values = df['blockMaterial']),
            dict(range = [134,3154],
                 visible = True,
                 label = 'Total Weight', values = df['totalWeight']),
            dict(range = [9,19984],
                 label = 'Assembly Penalty Wt', values = df['assemblyPW']),
            dict(range = [49000,568000],
                 label = 'Height st Width', values = df['HstW'])])
    )
)
fig.show()

In [None]:
import plotly.express as px
iris = px.data.iris()
print(type(iris))
fig = px.parallel_coordinates(iris, color="species_id", labels={"species_id": "Species",
                "sepal_width": "Sepal Width", "sepal_length": "Sepal Length",
                "petal_width": "Petal Width", "petal_length": "Petal Length", },
                             color_continuous_scale=px.colors.diverging.Tealrose,
                             color_continuous_midpoint=2)
fig.show()