# Imports

In [None]:
import numpy as np
import pickle
import os
import re
import pandas as pd
from tqdm import tqdm
import glob


import matplotlib.pyplot as plt

In [None]:
rng = np.random.default_rng()

In [None]:
%matplotlib inline
all_inline = True

# Individual analysis

## Individial file discovery/loading

In [None]:
# list recently created files

files = glob.glob("*pickle")
files.sort()
files[-5:]

In [None]:
# select file to use for br

file = 'bubblewrap_run_2023-09-01-12-03-51.pickle'
file = files[-1]
file

In [None]:
# load br

with open(file, "rb") as fhan:
    br = pickle.load(fhan)

br

In [None]:
br.behavior_pred_history[1].shape

In [None]:
pred = np.squeeze(br.behavior_pred_history[1][-1000:])
err = np.squeeze(br.behavior_error_history[1][-1000:])

true = pred-err

# plt.plot(true)
plt.plot(pred)

In [None]:
plt.plot(-pred + err)

In [None]:
np.corrcoef(true, pred)

In [None]:
time_offsets = br.data_source.time_offsets
plt.plot(time_offsets,[np.nanmean(br.behavior_error_history[i]**2) for i in time_offsets], '.-')

In [None]:
# show error over time

%matplotlib inline
fig, ax = plt.subplots()
for offset in time_offsets:
    ax.plot((br.behavior_error_history[offset]**2), '-', label=f"{offset}")
ax.set_ylabel("squared error")
ax.legend();
ax.set_xlabel("time");

In [None]:
# show error over time

%matplotlib inline
fig, ax = plt.subplots()
offset = 1
ax.plot((br.behavior_error_history[offset]), '-', label=f"{offset}")
ax.set_ylabel("squared error")
ax.legend();
ax.set_xlabel("time");

In [None]:
g_files = glob.glob("*pickle")
g_files.sort()

# 2023-10-03-13-57-46
# 2023-10-03-13-57-46
files = [
    # 'bubblewrap_run_2023-06-15-15-29-33.pickle',
] + g_files[-22*2:]

brs = []
for file in files:
    with open(file, "rb") as fhan:
        br = pickle.load(fhan)
    brs.append(br)

In [None]:
fig, ax = plt.subplots()
# bslice = brs[-18*2-2:-18*1-2]
bslice = brs[-18:]
for i, br in enumerate(bslice):
    ts = br.pickle_file[-18:-7].split("-")
    ax.plot(i,int(ts[0])*60*60*24 +int(ts[1])*60*60 + int(ts[2])*60 + int(ts[3]), '.')

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=6, figsize=(20,10), layout='tight')

input_keys = ['z(s(neural))', 'z(behavior)', 'z([s(neural), behavior])', 's3(z([p(neural), behavior]))', 's1(z([p(neural), behavior]))', 's1(z(s(neural)))']
output_keys = ['behavior', 's1(z(s(neural)))', 's1(z([s(neural), behavior]))']


for i in range(len(bslice)):

    r,c = np.unravel_index(i, (6,3))

    pred = bslice[i].behavior_pred_history[1][-1000:]
    err = np.squeeze(bslice[i].behavior_error_history[1][-1000:])
    
    true = pred-err

    ax[c,r].plot(true)
    ax[c,r].plot(pred)
    ax[c,r].text(.01,.99,f"{np.corrcoef(pred, true)[0,1]:.2f}", ha='left', va='top', transform=ax[c,r].transAxes)
    
    if c == 0:
        ax[c,r].set_title(input_keys[r])
    if r == 0:
        ax[c,r].set_ylabel(output_keys[c])
fig.savefig("table.png")


In [None]:
br.prediction_history[1]

In [None]:
i=0
pred = bslice[i].behavior_pred_history[1][-1000:]
err = np.squeeze(bslice[i].behavior_error_history[1][-1000:])
true0 = pred-err
i=2
pred = bslice[i].behavior_pred_history[1][-1000:]
err = np.squeeze(bslice[i].behavior_error_history[1][-1000:])
true2 = pred-err

In [None]:
plt.plot(true0,true2,'.')

In [None]:
obs, beh = fin.get_from_saved_npz("jpca_reduced_sc.npz")

concatenated = fin.zscore(np.hstack([obs, beh]))


In [None]:
from importlib import reload
import bubblewrap.plotting_functions as bpf
import bubblewrap.input_sources.functional as fin
reload(fin)

In [None]:
bpf.compare_metrics(brs, offset=1, colors=["C0"]*4 + ["C1"]*4 + ["C2"]*4 + ["C3"]*4, show_legend=False, show_title=False)

In [None]:
def summarize(brs):
    b = np.mean([br.behavior_error_history[1][-400:]**2 for br in brs])
    p = np.mean([br.prediction_history[1][-400:] for br in brs])
    e = np.mean([br.entropy_history[1][-400:] for br in brs])
    d = {"behavior mse":b, "log pred p":p, "entropy":e}
    for k in ["B_thresh", "copy_row_on_teleport"]:
        for br in brs:
            assert getattr(brs[0].bw, k) == getattr(br.bw, k)
        d[k] = getattr(brs[0].bw, k)
    return d

pd.DataFrame(
[summarize(brs[0:4]),
summarize(brs[4:8]),
summarize(brs[8:12]),
summarize(brs[12:16]),])

In [None]:


for seed in [0, 33, 75, 100]:
    data_source = jpca_dataset_with_time_as_behavior()
    data_source.drop_first_n_timesteps(seed)
    bw = Bubblewrap(seed=seed)
    run_bubblewrap()
    save_result()
    

In [None]:
u, s, vh = np.linalg.svd(brs[0].alpha_history, full_matrices=False)

In [None]:
plt.plot(u[:,:5])

In [None]:
plt.plot(vh.T[:,:5])

In [None]:
plt.imshow(vh);

In [None]:
%matplotlib qt
plt.imshow(brs[0].alpha_history, aspect='auto', interpolation='nearest')

In [None]:
np.linalg.svd?

In [None]:
l = []
middle = brs[0].prediction_history[1].shape[0]//2
for br in brs:
    l.append(br.prediction_history[1][middle:].mean())

In [None]:
np.mean(l)

In [None]:
np.std(l,ddof=1)/np.sqrt(len(l))

In [None]:
l[0]

In [None]:
l

In [None]:
brs[0].pred_list

In [None]:
obs,beh = br.data_source.get_history()
beh = beh[-len(br.alpha_history):]

In [None]:
beh.shape

In [None]:
u,s,vh = np.linalg.svd(br.alpha_history, full_matrices=False)

In [None]:
u,s,vh = np.linalg.svd(br.alpha_history[np.squeeze(beh==1)], full_matrices=False)
plt.plot(vh[:5,:].T);

In [None]:
plt.imshow(u)

In [None]:
plt.imshow(br.alpha_history[np.squeeze(beh==1)] - br.alpha_history[np.squeeze(beh==2)])

In [None]:
plt.plot(np.cumsum(s)/s.sum(), '.-')

In [None]:
df = []
# df.append(dict(
#     data_in="f[:,0]",
#     data_predicted="f[:,0]",
#     updated_alpha=False,
#     file="bubblewrap_run_2023-06-13-14-52-42.pickle"
# ))

# df.append(dict(
#     data_in="psvd(n)[:,:6]",
#     data_predicted="f[:,0]",
#     updated_alpha=False,
#     file="bubblewrap_run_2023-06-13-14-54-38.pickle"
# ))


# df.append(dict(
#     data_in="[psvd(n)[:,:6], f[:,0]]",
#     data_predicted="f[:,0]",
#     updated_alpha=False,
#     file="bubblewrap_run_2023-06-13-14-57-42.pickle"
# ))



df.append(dict(
    data_in="f[:20k,0]",
    data_predicted="f[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-15-53-29.pickle"
))

df.append(dict(
    data_in="psvd(n)[:20k,:6]",
    data_predicted="f[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-15-10-19.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,0]]",
    data_predicted="f[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-15-12-21.pickle"
))


df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,0]]",
    data_predicted="psvd(n)[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-15-25-50.pickle"
))

df.append(dict(
    data_in="psvd(n)[:20k,:6]",
    data_predicted="psvd(n)[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-15-27-02.pickle"
))

df.append(dict(
    data_in="f[:20k,0]",
    data_predicted="psvd(n)[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-13-16-02-33.pickle"
))

df.append(dict(
    data_in="psvd(n)[:20k,:6]",
    data_predicted="f[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-09-21-15.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:]]",
    data_predicted="f[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-09-25-02.pickle"
))

df.append(dict(
    data_in="f[:20k,:]",
    data_predicted="f[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-09-26-36.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,1:]]",
    data_predicted="f[:20k+n,0]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-09-31-27.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-09-33-36.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=5,
    file="bubblewrap_run_2023-06-14-09-52-32.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=10,
    file="bubblewrap_run_2023-06-14-09-55-19.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=20,
    file="bubblewrap_run_2023-06-14-09-46-35.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-10-04-05.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=5,
    file="bubblewrap_run_2023-06-14-09-59-56.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=10,
    file="bubblewrap_run_2023-06-14-10-02-10.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=50,
    file="bubblewrap_run_2023-06-14-11-18-18.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=99,
    file="bubblewrap_run_2023-06-14-10-40-34.pickle"
))


df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=100,
    file="bubblewrap_run_2023-06-14-10-08-48.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=101,
    file="bubblewrap_run_2023-06-14-10-38-50.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=150,
    file="bubblewrap_run_2023-06-14-10-37-02.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=200,
    file="bubblewrap_run_2023-06-14-10-35-03.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=500,
    file="bubblewrap_run_2023-06-14-10-30-11.pickle"
))

df.append(dict(
    data_in="f[:20k,:2]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=1000,
    file="bubblewrap_run_2023-06-14-10-31-59.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=50,
    file="bubblewrap_run_2023-06-14-11-14-57.pickle"
))


df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=100,
    file="bubblewrap_run_2023-06-14-10-50-08.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=200,
    file="bubblewrap_run_2023-06-14-11-02-29.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6], f[:20k,:2]]",
    data_predicted="f[:20k+n,2]",
    n_steps_ahead=500,
    file="bubblewrap_run_2023-06-14-11-04-56.pickle"
))


df.append(dict(
    data_in="[psvd(n)[:20k,:6], f]",
    data_predicted="d(f[:20k+n,:])",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-11-31-41.pickle"
))

df.append(dict(
    data_in="psvd(n)[:20k,:6]",
    data_predicted="d(f[:20k+n,:])",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-11-34-31.pickle"
))

df.append(dict(
    data_in="f",
    data_predicted="d(f[:20k+n,:])",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-11-36-36.pickle"
))


df.append(dict(
    data_in="[psvd(n)[:70587,:6],f[:70587,:2]]",
    data_predicted="f[:70587+n,2]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-12-34-23.pickle"
))


df.append(dict(
    data_in="[psvd(n)[:20k,:6],f[:20k,:]]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-13-59-08.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6],f[:20k,:]]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-14-31-53.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6],f[:20k,0]]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-14-33-08.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:20k,:6],f[:20k,1]]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-14-34-56.pickle"
))

df.append(dict(
    data_in="psvd(n)[:20k,:6]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-14-36-08.pickle"
))

df.append(dict(
    data_in="f[:20k,0]",
    data_predicted="psvd(n)[:20k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-14-37-19.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:70k,:6], f[:70k,:]]",
    data_predicted="[psvd(n)[:70k+n,:6],f[:70k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-15-31-11.pickle"
))

df.append(dict(
    data_in="psvd(n)[:70k,:6]",
    data_predicted="[psvd(n)[:70k+n,:6],f[:70k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-15-48-02.pickle"
))

df.append(dict(
    data_in="f[:70k,:]",
    data_predicted="[psvd(n)[:70k+n,:6],f[:70k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-15-53-30.pickle"
))

df.append(dict(
    data_in="[psvd(n)[:70k,:6], f[:70k,:]]",
    data_predicted="[psvd(n)[:70k+n,:6],f[:70k+n,:]",
    n_steps_ahead=1,
    file="bubblewrap_run_2023-06-14-17-18-03.pickle"
))

In [None]:
df = []

df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=0,
    file='bubblewrap_run_2023-06-16-11-18-29.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=0,
    file='bubblewrap_run_2023-06-16-11-21-06.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=0,
    file='bubblewrap_run_2023-06-16-11-23-42.pickle'
))

df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-11-26-15.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-11-28-42.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-11-31-16.pickle'
))


df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=2,
    file='bubblewrap_run_2023-06-16-11-33-47.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=2,
    file='bubblewrap_run_2023-06-16-11-36-14.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=2,
    file='bubblewrap_run_2023-06-16-11-38-48.pickle'
))

df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=5,
    file='bubblewrap_run_2023-06-16-11-41-20.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=5,
    file='bubblewrap_run_2023-06-16-11-43-51.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=5,
    file='bubblewrap_run_2023-06-16-11-46-21.pickle'
))


df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=10,
    file='bubblewrap_run_2023-06-16-11-48-49.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=10,
    file='bubblewrap_run_2023-06-16-11-51-20.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=10,
    file='bubblewrap_run_2023-06-16-11-53-49.pickle'
))

df.append(dict(
    data_in="[n,b]",
    data_predicted="[n,b]",
    n_steps_ahead=100,
    file='bubblewrap_run_2023-06-16-11-56-18.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=100,
    file='bubblewrap_run_2023-06-16-11-58-47.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=100,
    file='bubblewrap_run_2023-06-16-12-01-19.pickle'
))

In [None]:
df = []

df.append(dict(
    data_in="[n, b]",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-15-16-54-37.pickle'
))

df.append(dict(
    data_in="n",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-15-16-55-57.pickle'
))

df.append(dict(
    data_in="b",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-15-16-58-02.pickle'
))

df.append(dict(
    data_in="z(b,n)",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-10-44-49.pickle'
))

df.append(dict(
    data_in="z(b,n)",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-10-56-11.pickle'
))

df.append(dict(
    data_in="z(n)",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-10-57-21.pickle'
))

df.append(dict(
    data_in="z(b)",
    data_predicted="[n,b]",
    n_steps_ahead=1,
    file='bubblewrap_run_2023-06-16-10-58-24.pickle'
))









In [None]:
df = pd.DataFrame(df)

for idx in df.index:
    with open(df.loc[idx,"file"], "rb") as fhan:
        br = pickle.load(fhan)
    df.loc[idx,"mse"] = br.beh_error_list[len(br.beh_error_list)//2:].mean() 
    df.loc[idx, 'br'] = br
    df.loc[idx, 'data_predicted'] = df.loc[idx, 'data_predicted'].replace("+n", f"+{df.loc[idx, 'n_steps_ahead']}")
    # df.loc[idx, 'pred_shape'] = ([int(x) for x in br.beh_list.shape],)

In [None]:
df[["data_in", "data_predicted", "n_steps_ahead", "mse"]][:]

In [None]:
%matplotlib inline
for i in range(9):
    plt.figure(figsize=(5,3))
    for group in ["[n,b]","n","b"]:
        sdf = df[df.data_in == group]
        mse_s = []
        stds = []
        for br in sdf.br:
            es = br.beh_error_list[len(br.beh_error_list)//2:,i]
            mse_s.append(es.mean())
            stds.append(es.std())
        plt.errorbar((sdf.n_steps_ahead), mse_s, yerr=stds, label=group) 
        plt.ylabel("last-half MSE over all variables")
        plt.xlabel("number of steps ahead predicted")
        plt.title(f"predicted component {i}")
    plt.legend()

In [None]:
mse_s

In [None]:
steps = df.loc[0,'br'].entropy_list.shape[0]
ttr = df.loc[0,'br'].time_to_run
print(f"total time to run: {ttr}")
print(f"{1000*ttr/steps} ms/it or {steps/ttr}it/s")

In [None]:
w_time = df.loc[0,'br'].time_spent_on_w

print(f"total time spent on w: {w_time}")
print(f"{1000*w_time/steps} ms/it")

In [None]:
rows = [4,5,6]

dfile = np.load("../../" + df.loc[rows[0],"br"].file)



n = df.loc[rows[0],"br"].beh_list.shape[0]
to_predict = np.hstack([dfile["y"][0], dfile["x"]])[:n]

variable_labels = [f"neural data {x}" for x in range(4)] + ["run speed", "pupil size"] + [f"video svd[:,{x}]" for x in range(10)] 

l = []
names = []
for row in rows:
    l.append(((df.loc[row,"br"].beh_list - to_predict)**2).mean(axis=0))
    names.append(df.loc[row,"data_in"])

In [None]:
result = pd.DataFrame(np.array(l), index=names)
result

In [None]:
%matplotlib inline
normalized_result = (result - result.mean(axis=0))/result.std(axis=0)
plt.imshow(normalized_result)
plt.yticks(ticks=[0,1,2],labels=normalized_result.index,rotation=0);

In [None]:
%matplotlib inline
jump_size = 2_500
edges = np.arange(0,n,jump_size)

all_errors = []
for i in range(len(edges)-1):
    s = slice(edges[i],edges[i+1])
    l = []
    for row in rows:
        l.append(((df.loc[row,"br"].beh_list[s,:] - to_predict[s,:])**2).mean(axis=0))
    all_errors.append(l)
all_errors = np.array(all_errors)
# v_number = 0
for v_number in range(16):
    plt.figure(figsize=(5,10))
    this_variable = all_errors[:,:,v_number].T
    plt.imshow(this_variable)
    plt.xlabel(f"time (steps/{jump_size})")
    plt.yticks(ticks=[0,1,2],labels=normalized_result.index,rotation=0);
    plt.title(f"{variable_labels[v_number]} MSE")
    for i in range(this_variable.shape[0]):
        plt.text(7.6,i-.3, f"~mean: {this_variable[i,2:].mean()}", fontfamily="monospace")
        plt.text(7.6,i   , f"~std:  {this_variable[i,2:].std(ddof=1)}", fontfamily="monospace")

In [None]:
%matplotlib qt
i = 1

plt.plot(to_predict[:,i], 'k')
for row in rows:
    plt.plot(df.loc[row, "br"].beh_list[:,i], label=df.loc[row, "data_in"])
plt.legend()


In [None]:
plt.plot(all_pred)
plt.legend(normalized_result.index)
plt.ylabel("mean log p.p.")
plt.xlabel(f"time (steps/{jump_size})")

In [None]:
jump_size = 5_000
edges = np.arange(0,70_001,jump_size)

all_pred = []
for i in range(len(edges)-1):
    s = slice(edges[i],edges[i+1])
    l = []
    for row in rows:
        l.append(df.loc[row,"br"].pred_list[s,0].mean())
    all_pred.append(l)
all_pred = np.array(all_pred)
# v_number = 0
for v_number in range(9):
    plt.figure()
    plt.imshow(all_pred[:,:,v_number].T)
    # plt.xlabel(f"time (steps/{jump_size})")
    # plt.yticks(ticks=[0,1,2],labels=normalized_result.index,rotation=0);
    # plt.title(f"predicted variable {v_number} MSE")

In [None]:
to_predict = np.linalg.norm(f - f[0], axis=1)[:,None]
to_predict = f[:,2]
to_predict = psvd_n[:,0]




i = 0
to_predict = f[:,i]
plt.plot(to_predict)

to_compare = [0]
# to_compare = [30]
for i in to_compare:
    x = np.arange(df.loc[i,"br"].beh_list.shape[0])+df.loc[i,"n_steps_ahead"]
    beh_pred = df.loc[i,"br"].beh_list[:,i]
    plt.plot(x,beh_pred)
    
half_width = 3000

kernel = np.ones(half_width * 2)
kernel = kernel/kernel.sum()
mov_avg = np.convolve(np.squeeze(to_predict), kernel, mode='valid')
x = np.arange(mov_avg.shape[0])
# plt.plot(x + half_width*2, mov_avg)

In [None]:
n

In [None]:
n = beh_pred.shape[0]
perm = rng.permutation(np.arange(n))

cut_to_predict = to_predict[:n]
constant_mse = ((cut_to_predict - cut_to_predict.mean())**2).mean()
constant_mse = 1

print(((beh_pred - cut_to_predict)**2).mean()/constant_mse)
print(((beh_pred[perm] - cut_to_predict)**2).mean()/constant_mse)

In [None]:
to_predict.shape

In [None]:
plt.imshow(np.corrcoef(np.hstack([df.loc[i,"br"].beh_list]).T))

In [None]:
ws = [1, 100, 500, 1000, 1500, 2000, 2500, 3000, 4000, 50000]
ds = []
for w in ws:
    half_width = w
    kernel = np.ones(half_width * 2)
    kernel = kernel/kernel.sum()
    mov_avg = np.convolve(np.squeeze(to_predict), kernel, mode='valid')

    aligned_beh = beh_pred[half_width*2-df.loc[i,"n_steps_ahead"]:]

    ds.append(((mov_avg[aligned_beh.shape[0]] -  aligned_beh)**2).mean())

In [None]:
plt.plot(ws,ds)

In [None]:
f.shape

In [None]:
plt.plot(df.loc[5,"br"].beh_error_list)

In [None]:
runs = [10,11,12,13,25,26,27,28]
plt.plot(df.loc[runs,"n_steps_ahead"], df.loc[runs,"mse"], '.-', label=df.loc[runs[0], "data_in"])

runs = range(14,24)
plt.plot(df.loc[runs,"n_steps_ahead"], df.loc[runs,"mse"], '.-', label=df.loc[runs[0], "data_in"])
plt.ylabel("mse")
plt.xlabel("number of steps ahead")
plt.legend()

In [None]:
df.loc[18,"br"].beh_list.shape

In [None]:
br.beh_list

# Old

## Smoothed Entropy/Prop plot

In [None]:
def one_sided_ewma(data, com=100):
    return pd.DataFrame(data=dict(data=data)).ewm(com).mean()["data"]

def two_sided_ewma(data, half_width=75):
    kernel = np.linspace(0,10,half_width)
    kernel = np.exp(kernel)
    kernel = np.hstack((kernel, np.flip(kernel)))
    kernel = kernel/kernel.sum()
    return np.convolve(data, kernel, 'valid')

In [None]:
# calculates for the next cell

T = new_way_br.pred_list.shape[0]
new_way_means = []
old_way_means = []
shuffled_means = []

new_way_stds = []
old_way_stds = []
shuffled_stds = []
for i, step in enumerate(new_way_br.bw_parameters["lookahead_steps"]):
    first_nonzero = np.nonzero(np.isnan(new_way_br.pred_list[:,i]))[0]
    if len(first_nonzero):
        local_T = first_nonzero[0]
    else:
        local_T = T
    new_way_means.append(np.mean(new_way_br.pred_list[-local_T//2:local_T,i]))
    new_way_stds.append(np.std(new_way_br.pred_list[-local_T//2:local_T,i], ddof=1))
    # print(f"new way {step:>2} step(s) ahead mean: {new_way_means[-1]}")

for i, step in enumerate(old_way_br.bw_parameters["lookahead_steps"]):
    old_way_means.append(np.mean(old_way_br.pred_list[-T//2:,i]))
    old_way_stds.append(np.std(old_way_br.pred_list[-T//2:,i], ddof=1))
    # print(f"old way {step:>2} step(s) ahead mean: {old_way_means[-1]}")

if shuffled_br is not None:
    for i, step in enumerate(shuffled_br.bw_parameters["lookahead_steps"]):
        first_nonzero = np.nonzero(np.isnan(shuffled_br.pred_list[:,i]))[0]
        if len(first_nonzero):
            local_T = first_nonzero[0]
        else:
            local_T = T
        shuffled_means.append(np.mean(shuffled_br.pred_list[-local_T//2:local_T,i]))
        shuffled_stds.append(np.std(shuffled_br.pred_list[-local_T//2:local_T,i], ddof=1))
        # print(f"new way {step:>2} step(s) ahead mean: {new_way_means[-1]}")


new_way_means = np.array(new_way_means)
old_way_means = np.array(old_way_means)

new_way_stds = np.array(new_way_stds)
old_way_stds = np.array(old_way_stds)

if shuffled_br is not None:
    shuffled_stds = np.array(shuffled_stds)
    shuffled_means = np.array(shuffled_means)

### Over Time

#### Tune smoothing factor

In [None]:
if not all_inline:
    %matplotlib qt


data = old_way_br.pred_list[:,0]
smoothed_data1 = one_sided_ewma(data,40)

plt.plot(data, alpha=.5)
plt.plot(smoothed_data1);

half_width = 100
smoothed_data2 = two_sided_ewma(data,half_width)
plt.plot(np.arange(smoothed_data2.size) + half_width, smoothed_data2);

plt.legend(["1-step predictions", "one-sided smoothed", "two-sided smoothed"]);

#### Smoothed Predictions and alpha

In [None]:
# shows smoothed predictions over time

if not all_inline:
    %matplotlib qt
    
show_states_instead_of_alpha = True

smoothing_scale = 40
br = shuffled_br

fig, axs = plt.subplots(2,1, sharex=True)
steps = [1,2,3]
for si, step in enumerate(steps):
    i = br.bw_parameters["lookahead_steps"].index(step)
    old_pred = br.pred_list[:,i]
    new_pred = br.pred_list[:,i]
    # plt.plot(pred)


    smoothed_old_pred = one_sided_ewma(old_pred, smoothing_scale)
    smoothed_new_pred = one_sided_ewma(new_pred, smoothing_scale)
    half_width = 0 # NOTE: set this if you use the two-sided ewma function
    
    axs[0].plot(np.arange(smoothed_old_pred.size) + half_width, smoothed_old_pred, color=f'C{si}', linestyle='dashed')
    x_correction = (step-1)*1
    axs[0].plot(np.arange(smoothed_new_pred.size) + half_width + x_correction,smoothed_new_pred, color=f'C{si}',linestyle='solid')

legend = []
for step in steps:
    legend.append(f"old {step} step")
    legend.append(f"new {step} step")
axs[0].legend(legend)
axs[0].set_title(f"{dataset} smoothed prediction")
axs[0].set_xlabel("timestep")
axs[0].set_ylabel("log pred. prob.");



if states is not None and show_states_instead_of_alpha:
    axs[1].plot(states,'.')
else:
    alpha = br.alpha_list.T
    state_means = alpha.mean(axis=1)
    # alpha = alpha[state_means > np.quantile(state_means, .75),:]
    alpha = alpha[np.argsort(-alpha.mean(axis=1)) - (alpha.shape[0]//2),:]
    axs[1].imshow((alpha), aspect="auto", interpolation="nearest")
# axs[0].set_xlim([500, 1000])

#### Smoothed predictions and entropy

In [None]:
# shows smoothed predictions and entropy
if not all_inline:
    %matplotlib qt

    
br = new_way_br

smooting_scale = 50

predictions = br.pred_list[:,0]
smoothed_predictions = one_sided_ewma(predictions,smooting_scale)

fig, ax1 = plt.subplots()
ax1.plot(predictions, alpha=0.25, color='blue')
ax1.plot(smoothed_predictions, color='blue', label = "prediction")
ax1.tick_params(axis='y',labelcolor='blue')
ax1.set_title(f"(Smoothed) Predictions and Entropy ({dataset})")


entropy = br.entropy_list[:,0]
smoothed_entropy = one_sided_ewma(entropy, smooting_scale)
ax2 = ax1.twinx()
ax2.plot(entropy, color='green', alpha=0.25)
ax2.plot(smoothed_entropy, color='green', label="entropy")
max_entropy = np.log2(br.bw_parameters["num"])
ax2.plot([0, entropy.shape[0]], [max_entropy,]*2, 'g--')
ax2.tick_params(axis='y',labelcolor='green')
ymin, ymax = ax2.get_ylim()
ax2.set_ylim((ymin, 2*(ymax-ymin) + ymin))

fig.legend()

# Alpha analysis

In [None]:
plt.plot(br.alpha_list[:,br.dead_nodes].sum(axis=1))

# Exponentially weighted least squares

### Artificial Regression Problem

In [None]:
rng = np.random.default_rng()

In [None]:
d = 2
n = 100

w_true = np.array([-1,1]).reshape(-1,1)

C = np.zeros(shape=(0,d))
for i in range(n):
    alpha = rng.multivariate_normal([1,1],[[1,0],[0,1]]).reshape(-1,1)
    C = np.vstack((C,alpha.T))
    
y= rng.multivariate_normal(np.squeeze(C @ w_true), np.diag([.9**x for x in range(n)]))

### Real regression Problem

In [None]:
C = br.alpha_list

y = np.tile(obs,C.shape[0]//obs.shape[0])[:,None]
y.shape

### Vanilla

In [None]:
w = 30
D = np.linalg.inv(C[:w,:].T @ C[:w,:])
Ct_y = C[:w,:].T @ y[:w]

for i in range(w,n):
    alpha = C[i,:,None]
    D = D - D @ alpha @ alpha.T  @ D/(1 + alpha.T @ D @ alpha)
    Ct_y = Ct_y + y[i] * alpha
    

In [None]:
%matplotlib qt
fig, axs = plt.subplots(nrows=2,ncols=1)
axs[0].plot(D@Ct_y, label="vanilla")
axs[0].plot(wD@wCt_y, label="weighted")

axs[1].plot(np.linalg.inv(C.T @ C)@C.T@y, label="vanilla")
axs[1].plot(np.linalg.inv(C.T @ C + np.eye(C.shape[1]))@C.T@y, label="normalized")

axs[1].plot(np.linalg.inv(C.T @ (pre_V[:,None] * C)) @ (C.T * pre_V)  @ y, label="weighted")
axs[1].plot(np.linalg.inv(C.T @ (pre_V[:,None] * C) + np.eye(C.shape[1])) @ (C.T * pre_V)  @ y, label="weighted normalized")


axs[0].set_title("iterative")
axs[1].set_title("batch")

axs[0].legend()
axs[1].legend()

### Vanilla with observation number-based reweighting

In [None]:
ww = br.beh_counts.sum(axis=1)

In [None]:
print(f"{np.allclose(np.linalg.inv(C.T @ C), D) = }")
print(f"{np.allclose(C.T @ y, Ct_y) = }")

### Weighted version

In [None]:
v = .997

pre_V = np.array([v**(n-(i+1)) for i in range(C.shape[0])])


In [None]:
w = 30

sub_V = np.diag([v**(w-(i+1)) for i in range(w)])
wD = np.linalg.inv(C[:w,:].T @ sub_V @ C[:w,:])
wCt_y = C[:w,:].T@ sub_V @ y[:w]

for i in range(w,n):
    alpha = C[i,:,None]
    wD = wD/v
    wD = wD - wD @ alpha @ alpha.T  @ wD/(1 + alpha.T @ wD @ alpha)
    wCt_y = v*wCt_y + y[i] * alpha
    

In [None]:
print(f"{np.allclose(np.linalg.inv(C.T * pre_V @ C), wD) = }")
print(f"{np.allclose(C.T @ (pre_V[:,None] * y), wCt_y) = }")

In [None]:
((np.linalg.inv(C.T @ C) @ C.T @ y - w_true.T)**2).sum() - ((np.linalg.inv(C.T @ (pre_V[:,None] * C)) @ C.T @ (pre_V[:,None] * y) - w_true.T)**2).sum()