In [1]:

import glob
import os

import sys
import glob
from pathlib import Path, PurePath
path = Path.cwd()
parent_path = path.parents[1]
sys.path.append(str(PurePath(parent_path, 'neuroformer')))
sys.path.append('neuroformer')
sys.path.append('.')
sys.path.append('../')

import pandas as pd
import numpy as np

import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

from torch.utils.data.dataloader import DataLoader

import math

from neuroformer.model_neuroformer import GPT, GPTConfig
from neuroformer.trainer import Trainer, TrainerConfig
from neuroformer.utils_2 import (set_seed, update_object, 
                                 check_common_attrs, running_jupyter, 
                                 all_device, load_config, update_config, 
                                 dict_to_object, object_to_dict, recursive_print)
from neuroformer.visualize import set_plot_params
from neuroformer.SpikeVidUtils import make_intervals, round_n, SpikeTimeVidData2
import gdown

parent_path = os.path.dirname(os.path.dirname(os.getcwd())) + "/"

import argparse
from neuroformer.SpikeVidUtils import round_n

# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [2]:
DATASET = "medial"
DT = 0.05

In [3]:
""" 

-- DATA --
neuroformer/data/OneCombo3_V1AL/
df = response
video_stack = stimulus
DOWNLOAD DATA URL = https://drive.google.com/drive/folders/1jNvA4f-epdpRmeG9s2E-2Sfo-pwYbjeY?usp=sharing


"""

from neuroformer.prepare_data import DataLinks
from neuroformer.DataUtils import round_n, get_frame_idx
from neuroformer.DataUtils import resample_spikes


if DATASET in ["first", "visnav"]:
    data_path = "./data/VisNav_VR_Expt"
elif DATASET == "medial":
    data_path = "./data/VisNav_VR_Expt/MedialVRDataset/"
elif DATASET == "lateral":
    data_path = "./data/VisNav_VR_Expt/LateralVRDataset"


train_data = pd.read_csv(os.path.join(data_path, "train_data.csv"))

spikes_path = f"{data_path}/NF_1.5/spikerates_dt_0.01.npy"
speed_path = f"{data_path}/NF_1.5/behavior_speed_dt_0.05.npy"
stim_path = f"{data_path}/NF_1.5/stimulus.npy"

spikes = resample_spikes(np.load(spikes_path), 0.01, DT).transpose()
speed = np.round(np.load(speed_path), 3).transpose()
stimulus = np.load(stim_path)

frame_feats = None
print(f"spikes: {spikes.shape}, speed: {speed.shape}")

spikes: (32614, 1906), speed: (32614,)


In [4]:
train_indexes = set([get_frame_idx(value, DT) for value in train_data['Interval']])
test_indexes = set(range(len(speed))) - train_indexes
len_test = len(test_indexes)

spikes_train = spikes[list(train_indexes)]
spikes_test = spikes[list(test_indexes)][:len_test]

speed_train = speed[list(train_indexes)]
speed_test = speed[list(test_indexes)][:len_test]

print(f"spikes_train: {spikes_train.shape}, spikes_test: {spikes_test.shape}")

spikes_train: (293, 1906), spikes_test: (32321, 1906)


In [5]:

max_iterations = np.arange(100, 10000, 900) #default is 5000.
output_dimension = [2, 3, 8, 16, 32] #here, we set as a variable for hypothesis testing below.

print(f"max_iterations: {max_iterations}, output_dimension: {output_dimension}")

max_iterations: [ 100 1000 1900 2800 3700 4600 5500 6400 7300 8200 9100], output_dimension: [2, 3, 8, 16, 32]


In [6]:
import pickle
import matplotlib.pyplot as plt
import cebra
from cebra import CEBRA
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr

max_iterations = np.arange(100, 10000, 1900) #default is 5000.
output_dimension = [2, 3, 8, 16, 32] #here, we set as a variable for hypothesis testing below.
OFFSET = 1

# Store results
results = []
MAX_CORR = 0

max_iter = 5000
out_dim = 16

print(f"max_iterations: {max_iter}, output_dimension: {out_dim}")

cebra_model = CEBRA(model_architecture=f'offset{OFFSET}-model',
                    batch_size=16,
                    learning_rate=e-4,
                    temperature=1,
                    output_dimension=out_dim,
                    max_iterations=max_iter,
                    distance='cosine',
                    device='cuda_if_available',
                    verbose=True)

# 1. Train a CEBRA-Time model on the whole dataset
cebra_model.fit(spikes_train, speed_train)

print("finished training")
embedding = cebra_model.transform(spikes_train)
print("finished embedding")
embedding_test = cebra_model.transform(spikes_test)
print("finished embedding test")

# 3. Train the decoder on the training set
decoder = cebra.KNNDecoder()
# decoder = cebra.L1LinearRegressor()
decoder.fit(embedding, speed_train)

# 4. Test the decoder on the test set
speed_pred = decoder.predict(embedding_test)

# 5. Compute the correlation between the predicted and true speed
corr = pearsonr(speed_pred, speed_test)[0]

print(f"corr: {corr}")

max_iterations: 5000, output_dimension: 16


pos:  0.0722 neg:  2.6367 total:  2.7089 temperature:  1.0000: 100%|██████████| 5000/5000 [00:42<00:00, 118.10it/s]


finished training
finished embedding
finished embedding test
corr: 0.3563641032748791


: 