In [43]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import cebra
from PIL import Image
import cv2
import os
import torch
import torch.nn.functional as F
from torch import nn
import itertools
from torch.utils.tensorboard import SummaryWriter
import random
import gc
from cebra_utils import *
from vit_pytorch import ViT
import pandas as pd

data_path = '/mnt/teams/TM_Lab/Tony/water_reaching/Data/rig1_data/processed/GRL3_2023-07-13_1'

In [44]:
pose_data = load_pose_data(data_path, 0)
## convert pose data to a numpy array size rows x columns
pose_data.shape

(288, 187)

In [45]:
## Given the path to a tif file, return that as a 3d numpy array
# @param path: path to tif file
# @return: 3d numpy array, first array is time dimension
def load_tif(path):
    img = cv2.imreadmulti(path, flags=(cv2.IMREAD_GRAYSCALE | cv2.IMREAD_ANYDEPTH))[1]
    img = np.array(img)
    return img

## Loads the brain data from a given trial
def load_brain_data(parent_directory, trial_num, type='gcamp'):
    # Load the data
    data_path = os.path.join(parent_directory, 'trial_' + str(trial_num) + '/brain/' + type + '.tif')
    data = load_tif(data_path)
    return data

def load_pose_data(parent_directory, trial_num):
    # Load the data
    data_path = os.path.join(parent_directory, 'trial_' + str(trial_num) + '/anipose/videos/pose-3d/vid.csv')
    data = pd.read_csv(data_path)
    data = data.to_numpy()
    return data

## Go through all trials and load the brain data for each trial
def load_all_brain_data_trials(parent_directory, type='gcamp'):
    # Get the number of trials
    num_trials = len([x for x in os.listdir(parent_directory) if 'trial_' in x])
    # Load the data
    return np.array([load_brain_data(parent_directory, trial_num, type) for trial_num in range(num_trials)])
    
## Takes a numpy array in and returns a memory mapped numpy array
# @param arr: numpy array to be memory mapped
# @param path: path to save the memory mapped array to
# @return: memory mapped numpy array
def memmap(arr, path):
    # Save the array
    np.save(path, arr)
    # Load the array
    return np.load(path, mmap_mode='r')

In [46]:
brain_images = load_brain_data(data_path, 1)
brain_images = np.array([cv2.resize(x, (256, 256)) for x in brain_images])
# convert each image to rgb (3 channels)
brain_images = np.array([np.stack((x,)*3, axis=0) for x in brain_images])
# to torch
brain_images = torch.from_numpy(brain_images).float()
print(brain_images.shape)

torch.Size([288, 3, 256, 256])


In [47]:
## Creates a CEBRA multisession Data Loader with the given data, feature data and brain data must share first 2 dimensions
def init_dataloader(brain_data, feature_data, num_steps, time_offset, conditional, batch_size=1, cebra_offset=None ):
    datasets = []
    print('loading data')
    for session in zip(brain_data, feature_data):
        brain_data_tensor  = torch.FloatTensor(session[0]).unsqueeze(1)
        feature_data_tensor = torch.FloatTensor(session[1])
        datasets.append(cebra.data.datasets.TensorDataset(brain_data_tensor, continuous=feature_data_tensor, offset=cebra_offset))
    dataset_collection = cebra.data.datasets.DatasetCollection(*datasets)
    return cebra.data.multi_session.ContinuousMultiSessionDataLoader(
        dataset=dataset_collection,
        batch_size=batch_size,
        num_steps=num_steps,
        time_offset=time_offset,
        conditional=conditional,
    ).to('cuda')

## initialize a single session dataloader
def init_single_session_dataloader(brain_data, feature_data, num_steps, time_offset, conditional, batch_size=1, cebra_offset=None ):
    brain_data_tensor  = torch.FloatTensor(brain_data).unsqueeze(1)
    feature_data_tensor = torch.FloatTensor(feature_data)
    dataset = cebra.data.datasets.TensorDataset(brain_data_tensor, continuous=feature_data_tensor, offset=cebra_offset)
    return cebra.data.single_session.ContinuousDataLoader(
        dataset=dataset,
        batch_size=batch_size,
        num_steps=num_steps,
        time_offset=time_offset,
        conditional=conditional,
    ).to('cuda')

## Creat and train the model in partial batches of data
def train_model(brain_data, feature_data, num_steps, time_offset, conditional, batch_size, cebra_offset, input_size, hidden_units, output_dimension, model_name, device, output_model_path, saved_model = None):
    ## Load dataloader for first slice of data
    print('Loading data')
    dataloader= init_single_session_dataloader(brain_data, feature_data, num_steps, time_offset, conditional, batch_size, cebra_offset)
    print('Creating model')
    ## create list of models
    model = torch.nn.ModuleList([
    cebra.models.init(model_name, input_size,
                        hidden_units, output_dimension, True)
    for _ in range(len(list(dataloader.dataset.iter_sessions())))
    ]).to(device)
    if saved_model is not None:
        model.__setstate__(saved_model)

    ## Load criterion
    criterion = cebra.models.criterions.LearnableCosineInfoNCE(temperature=1, min_temperature=0.1).to(device)
    start_state = criterion.state_dict()
    ## Load optimizer
    optimizer = torch.optim.Adam(list(model.parameters()) + list(criterion.parameters()), lr=0.001)

    print('Loading solver')
    ## Load solver and train on first slice of data
    solver = cebra.solver.MultiSessionSolver(
        model=model,
        optimizer=optimizer,
        criterion=criterion,
        tqdm_on=True,
    ).to(device)
    print('Training on slice 1')
    solver.fit(dataloader.to(device),
                save_frequency=500,
                logdir='runs',)
 
    print('Training complete, saving model')
    torch.save(solver, output_model_path)
    return solver



In [48]:
brain_data = np.array([load_brain_data(data_path, x) for x in range(1)])
feature_data = np.array([load_pose_data(data_path, x) for x in range(1)])


In [49]:
# reshape all images in brain data to be 256 x 256
# 20 x 288 x 253 x 190 -> 20 x 288 x 256 x 256
brain_data = np.array([np.array([cv2.resize(img, (256, 256)) for img in trial]) for trial in brain_data])
brain_images = np.array([np.stack((x,)*3, axis=1) for x in brain_data])


In [50]:
brain_images.shape

(1, 288, 3, 256, 256)

In [51]:
# For ViT model we need to reshape the data to be 256 x 256 x 3 as the model expects 3 channels, so we use a 1,2 offset
model = train_model(
    brain_data=brain_data,
    feature_data=feature_data,
    num_steps=100,
    time_offset=10,
    conditional='time_delta',
    batch_size=2,
    cebra_offset=cebra.data.datatypes.Offset(1,2),
    input_size=256,
    hidden_units=3,
    output_dimension=16,
    model_name='ViT-16-v1',
    device='cuda',
    output_model_path='model.pth',
)

Loading data
Creating model


AttributeError: 'TensorDataset' object has no attribute 'iter_sessions'