In [None]:
import scipy
import numpy as np
import h5py
import matplotlib.pyplot as plt
import bubblewrap

# Monkey Reach (Indy)
[source](https://zenodo.org/record/3854034)

In [None]:
fhan = h5py.File(bubblewrap.config.CONFIG["data_path"] / 'indy_20160407_02.mat', 'r')

In [None]:
# this is a first pass I'm using to find the first and last spikes
l = []
for j in range(fhan['spikes'].shape[1]):
    for i in range(fhan['spikes'].shape[0]):
        v = np.squeeze(fhan[fhan['spikes'][i,j]])
        if v[0] > 50: # empy channels have one spike very early while everything else is quiet
            l.append(v)

# this finds the first and last spikes in the dataset so we can set our bin boundaries
ll = [leaf for tree in l for leaf in tree]
stop = np.ceil(max(ll))
start = np.floor(min(ll))

# this creates the bins we'll use to group spikes
bin_width = .03 # units are seconds
bins = np.arange(start,stop, bin_width)
bin_centers = np.convolve([.5, .5], bins, "valid")

In [None]:
# columns of A are channels, rows are time bins
A = np.zeros(shape=(bins.shape[0]-1,len(l)))
c = 0 # we need this because some channels are empty
for j in range(fhan['spikes'].shape[1]):
    for i in range(fhan['spikes'].shape[0]):
        v = np.squeeze(fhan[fhan['spikes'][i,j]])
        if v[0] > 50:
                A[:, c],_ =  np.histogram(np.squeeze(fhan[fhan['spikes'][i,j]]), bins=bins)
            c += 1

In [None]:
np.squeeze(fhan[fhan['spikes'][1,1]]) # this is an array of spike times

In [None]:
# load behavior data
raw_behavior = fhan['finger_pos'][:].T
t = fhan["t"][0]

# this resamples the behavior so it's in sync with the binned spikes
behavior = np.zeros((bin_centers.shape[0],raw_behavior.shape[1]))
for c in range(behavior.shape[1]):
    behavior[:,c] = np.interp(bin_centers, t, raw_behavior[:,c])
    

In [None]:
mask = bin_centers > 70 # behavior is near-constant before 70 seconds
bin_centers, behavior, A = bin_centers[mask], behavior[mask], A[mask] 

In [None]:
np.savez("indy_full.npz", y=A[None,:,:], x=behavior)

In [None]:
plt.plot(bin_centers[:300], behavior[:300]);
plt.title("Hand position over time");
plt.xlabel("time (s)");

In [None]:
plt.imshow(A, aspect='auto', interpolation='nearest');
plt.title("Firing rate matrix");
plt.ylabel("time (bins)");
plt.xlabel("unit (neuron)");

In [None]:
plt.imshow(bubblewrap.input_sources.functional.prosvd_data(A[1100:1200], 20, 20), aspect='auto', interpolation='nearest');
plt.xlabel("latent");
plt.title("ProSVD of a slice of FR Matrix");
plt.ylabel("time (bins)");

In [None]:
plt.imshow(behavior, aspect='auto', interpolation='nearest')

# Buzaki Lab data
[source](https://crcns.org/data-sets/thalamus/th-1/about-th-1)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# mv /home/jgould/Downloads/Mouse*.tar.gz /home/jgould/Documents/Bubblewrap/generated/datasets/buzaki/
# !tar -xvf Mouse24-131216.tar.gz

In [None]:
cd /home/jgould/

In [None]:
ls

In [None]:
datasets = ["Mouse12-120806", "Mouse12-120807", "Mouse24-131216"]

In [None]:
def construct_buzaki_data(base, bin_size):
    parent_folder = bw.CONFIG["data_path"] / 'buzaki'
    def read_int_file(fname):
        with open(fname) as fhan:
            ret = []
            for line in fhan:
                line = int(line.strip())
                ret.append(line)
            return ret
        
    shanks = []
    for n in range(30):
        shanks.append(os.path.isfile(parent_folder / base / f"{base}.clu.{n}"))

    assert not any(shanks[20:])
    shanks = np.nonzero(shanks)[0]

    sampling_rate = 20_000
    clusters_to_ignore = {0,1}

    shank_datas = []
    cluster_mapping = {} # this will be a bijective dictionary between the (shank, cluster) and unit_number (also nan entries)

    min_time = float("inf")
    max_time = 0
    used_columns = 0
    for shank in shanks:
        clusters = read_int_file(parent_folder / base / f"{base}.clu.{shank}")
        n_clusters = clusters[0]
        clusters = clusters[1:]

        # TODO: check if I should exclude the hash unit
        for cluster in np.unique(clusters):
            if cluster not in clusters_to_ignore:
                cluster_mapping[(shank, cluster)] = used_columns
                used_columns += 1
                cluster_mapping[cluster_mapping[(shank, cluster)]] = (shank, cluster)
            else:
                cluster_mapping[(shank, cluster)] = np.nan


        clusters = [cluster_mapping[(shank,c)] for c in clusters]
        times = read_int_file(parent_folder / base / f"{base}.res.{shank}")

        pairs = np.array([times, clusters]).T
        pairs = pairs[~np.isnan(pairs[:,1]),:]

        if len(pairs):
            pairs[:,0] /= sampling_rate


            min_time = min(min_time, pairs[:,0].min())
            max_time = max(max_time, pairs[:,0].max())


            shank_datas.append(pairs)
            
    bins = np.arange(min_time, max_time + bin_size, bin_size)
    bin_centers = np.convolve([.5, .5], bins, "valid")
    A = np.zeros((len(bins)-1, used_columns ))

    for shank_data in shank_datas:
        max_lower_bound = 0
        last_time = 0
        for time, cluster in shank_data:
            assert time >= last_time
            while time > bins[max_lower_bound + 1]:
                max_lower_bound += 1
            A[max_lower_bound, int(cluster)] += 1
            last_time = time

    with open(parent_folder / base / f"{base}.whl", "r") as fhan:
        coords = [[] for _ in range(4)]
        for line in fhan:
            line = [float(x) for x in line[:-1].split("\t")]
            for i in range(4):
                coords[i].append(line[i])

    raw_behavior = np.array(coords).T

    sampling_rate = 39.06
    t = np.arange(raw_behavior.shape[0])/sampling_rate
    
    raw_behavior[raw_behavior == -1] = np.nan
    
    return A, raw_behavior, bin_centers, t

In [None]:
def resample_behavior(raw_behavior,bin_centers,t):
    good_samples = ~np.any(np.isnan(raw_behavior), axis=1)
    resampled_behavior = np.zeros((bin_centers.shape[0], raw_behavior.shape[1]))
    for c in range(resampled_behavior.shape[1]):
        resampled_behavior[:,c] = np.interp(bin_centers, t[good_samples], raw_behavior[good_samples,c])
    return resampled_behavior

In [None]:
A, raw_behavior, bin_centers, t = construct_buzaki_data(datasets[0], 0.03)

In [None]:
rbehavior = resample_behavior(raw_behavior, bin_centers, t)

In [None]:
plt.imshow(A[:200], aspect='auto', interpolation='none')

In [None]:
bad = np.any(raw_behavior==-1,axis=1)
plt.plot(t,bad, '.')
plt.yticks([0,1], labels=["present", "missing"])
plt.title(f"Proportion of missing samples: {bad.mean():.3f}");

In [None]:
c = raw_behavior[~np.any(raw_behavior==-1,axis=1)]
plt.plot(c[:,0], c[:,1])
plt.axis("equal");
plt.title("LED 1 position during good samples");

In [None]:
hd = np.arctan2(resampled_behavior[:,0] - resampled_behavior[:,2], resampled_behavior[:,1] - resampled_behavior[:,3])

## Testing

In [None]:
rng = np.random.default_rng()

In [None]:
neuron = rng.integers(low=0, high=A.shape[1])
# neuron = 59
# neuron = 6
# neuron = 24

hd_bins = np.linspace(-np.pi, np.pi, 50)
hd_occupancy = np.histogram(hd, hd_bins)[0]
hd_centers = np.convolve(hd_bins, [.5, .5], 'valid')

counts = []
for i in range(len(hd_bins)-1):
    mask = (hd_bins[i] <= hd) & (hd < hd_bins[i+1])
    counts.append(A[mask,neuron].sum())
counts = counts/hd_occupancy

permutation1 = rng.permutation(len(A))
permutation2 = rng.permutation(len(hd))
shuf_counts = []
for i in range(len(hd_bins)-1):
    mask = (hd_bins[i] <= hd[permutation2]) & (hd[permutation2] < hd_bins[i+1])
    shuf_counts.append(A[permutation1[mask],neuron].sum())
shuf_counts = shuf_counts/hd_occupancy



fig, ax = plt.subplots(ncols=2,subplot_kw={'projection': 'polar'})

ax[0].plot(hd_centers, counts)
ax[0].grid(True)
ax[0].set_title(f"neuron {neuron} HD selectivity")
ax[0].set_rticks([]);

ax[1].plot(hd_centers, shuf_counts)
ax[1].grid(True)
ax[1].set_title(f"shuffled neuron {neuron} HD selectivity")
ax[1].set_rticks([]);

# Fly Data

In [None]:
import numpy as np
from pynwb import NWBHDF5IO
from IPython.display import display
import matplotlib.pyplot as plt
from nwbwidgets import nwb2widget
import glob
import bubblewrap as bw

In [None]:
pwd

In [None]:
base_path = bw.CONFIG["data_path"] / 'fly'

In [None]:
for file in sorted(base_path.glob("*.nwb")):
    with NWBHDF5IO(file, mode="r", load_namespaces=True) as fhan:
        nwbfile = fhan.read()
        n_roi = nwbfile.processing["ophys"].data_interfaces["DfOverF"].roi_response_series['RoiResponseSeries'].data.shape[1]
        n_beh = nwbfile.processing['behavioral state'].data_interfaces['behavioral state'].data.shape[1]
        print(f"{n_roi}\t{n_beh}\t{file.name}" )        

In [None]:
for file in sorted(base_path.glob("*.nwb")):
    with NWBHDF5IO(file, mode="r", load_namespaces=True) as fhan:
        nwbfile = fhan.read()
        n_roi = nwbfile.processing["ophys"].data_interfaces["DfOverF"].roi_response_series['RoiResponseSeries'].data.shape[1]
        n_beh = nwbfile.shape[1]
        if n_beh == 6 and n_roi > 200:
            print(file.name)        

In [None]:
datasets = """2019_06_28_fly2.nwb
2019_07_01_fly2.nwb
2019_08_07_fly2.nwb
2019_08_14_fly1.nwb
2019_08_14_fly2.nwb
2019_08_14_fly3_2.nwb
2019_08_20_fly2.nwb
2019_08_20_fly3.nwb
2019_10_02_fly2.nwb
2019_10_10_fly3.nwb
2019_10_14_fly2.nwb
2019_10_14_fly3.nwb
2019_10_14_fly4.nwb
2019_10_18_fly2.nwb
2019_10_18_fly3.nwb
2019_10_21_fly1.nwb"""

datasets = datasets.strip().split("\n")
datasets

In [None]:
bw.

In [None]:
io = NWBHDF5IO(base_path / '2019_08_20_fly2.nwb', mode="r", load_namespaces=True)
nwbfile = io.read()
nwb2widget(nwbfile)

In [None]:


A -= A.mean(axis=0)
A /= A.std(axis=0, ddof=1)
u, s, vh = np.linalg.svd(A, full_matrices=False)

plt.plot(u[:,:3])

In [None]:
fig, ax2 = plt.subplots()

color = 'tab:blue'
ax2.plot(t, beh, color=color)
ax2.tick_params(axis='y', labelcolor=color)

ax1 = ax2.twinx()


color = 'tab:red'
ax1.plot(t, u[:,0], color=color)
ax1.tick_params(axis='y', labelcolor=color)

fig.tight_layout() 

# Mouse Video

In [None]:
import h5py
import bubblewrap
import numpy as np
import os
import matplotlib.pyplot as plt
import urllib.request
from bubblewrap import CONFIG

### Widefield

In [None]:
# https://labshare.cshl.edu/shares/library/repository/38599/Widefield/mSM30/10-Oct-2017/mSM30_SpatialDisc_Oct10_2017_Session1.mat

In [None]:
base = "https://labshare.cshl.edu/shares/library/repository/38599/Widefield/mSM30/10-Oct-2017/"
import urllib.request

fname = "Vc.mat"
if not os.path.exists(fname):
    urllib.request.urlretrieve(f"{base}/{fname}", fname)

In [None]:
fhan = h5py.File(CONFIG["data_path"] / "musal" / "their_data" / , 'r')

In [None]:
for key in fhan:
    if hasattr(fhan[key], 'shape'):
        print(f"{key} {fhan[key].shape}")

In [None]:
U = fhan["U"][:]
Vc = fhan["Vc"][:]

In [None]:
plt.imshow(U[40,:,:],aspect='auto')

In [None]:
UU = U.reshape(U.shape[0], -1)
plt.imshow(UU, aspect='auto', interpolation='none')

In [None]:
VVc = Vc.reshape(-1, Vc.shape[-1])

In [None]:
plt.imshow(VVc, aspect='auto', interpolation='none')

In [None]:
U.shape

In [None]:
VVc.shape, UU.shape

In [None]:
test_frames = VVc[:3,:] @ UU
test_frames = test_frames.reshape([-1,U.shape[-2], U.shape[-1]])
plt.imshow(test_frames[0,:,:])

### 2p

In [None]:
from scipy.io import loadmat
import bubblewrap
import numpy as np
import os
import matplotlib.pyplot as plt
import urllib
from bubblewrap import CONFIG
import h5py
from skimage.transform import resize
from tqdm import tqdm
import bubblewrap.input_sources.functional as fin

In [None]:
ca, vid, t_ca, t_vid = fin.generate_musal_dataset(cam=2, video_target_dim=100, resize_factor=1, prosvd_init_size=100)

In [None]:
ca, vid, t_ca, t_vid = fin.generate_musal_dataset(cam=1, video_target_dim=10, resize_factor=1, prosvd_init_size=100)

In [None]:
trial_edges.astype(int)

In [None]:
trial_edges.shape

In [None]:
data_dir = CONFIG["data_path"] / "musal/their_data/2pData/Animals/mSM49/SpatialDisc/30-Jul-2018/"
variables = loadmat(data_dir/"data.mat",  squeeze_me=True, simplify_cells=True)
A = variables["data"]['dFOF']

A = np.vstack(A.T)

In [None]:
A.shape

In [None]:
Data.shape

In [None]:
trial_edges.shape

In [None]:
A.shape

In [None]:
np.linspace(*trial_edges[i],10)

In [None]:
%matplotlib inline
plt.plot(trial_edges[:,1] - trial_edges[:,0], '.')
plt.ylabel("trial_end - trial_start")
plt.xlabel("trial");

In [None]:
?

In [None]:
d = Data[:10]

e = d.reshape(d.shape[0], -1)

e =e + np.arange(e.shape[0])[:,None] * 30
e %= 255
e = e.reshape(d.shape)
plt.imshow(e[7], cmap='gray')

### Beh

In [None]:
# base = "https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM60/SpatialDisc/06-Sep-2018/"


fname = 'mSM60_SpatialDisc_Sep06_2018_Session1.mat'
eventual_path = bubblewrap.config.CONFIG["data_path"] / "musal" /  fname
if not os.path.exists(eventual_path):
    urllib.request.urlretrieve(f"{base}{fname}", eventual_path)

In [None]:
base = "https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM60/SpatialDisc/06-Sep-2018/"
base = base + "BehaviorVideo/"
# https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM60/SpatialDisc/06-Sep-2018/
fname = 'SVD_CombinedSegments.mat'
eventual_path = bubblewrap.config.CONFIG["data_path"] / "musal" /  fname
if not os.path.exists(eventual_path):
    urllib.request.urlretrieve(f"{base}{fname}", eventual_path)

In [None]:
fhan = loadmat(eventual_path, squeeze_me=True, simplify_cells=True)

In [None]:
fhan['allTimes'][0].shape

In [None]:
fhan['allTimes'][1].shape

In [None]:
plt.plot(fhan['allTimes'][1][:121473], '.')
plt.plot(fhan['allTimes'][0][:121473], '.')

In [None]:
for key in fhan:
    if hasattr(fhan[key], 'shape'):
        print(f"{key} {fhan[key].shape}")

In [None]:
vidV = fhan['vidV']

In [None]:
plt.plot(vidV[:3000,0:3])

In [None]:
vidV.shape

In [None]:
fhan["vidU"].shape

In [None]:
1600

In [None]:
plt.imshow((vidV[0,:] @ fhan['vidU']).reshape(160,100), interpolation='none')

In [None]:
16000 == 4*4*5*2*10*10