In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

from retina_to_connectome import get_activation_tensor, get_batch_voronoi_averages


from flyvis.examples.flyvision_ans import DECODING_CELLS

last_good_frame = 8

  _C._set_default_tensor_type(t)


In [2]:
# get data
activations_dir = "flyvis/parsed_objects"
activations = np.load(os.path.join(activations_dir, "decoding_activations.npy"), allow_pickle=True)
classification = pd.read_csv("adult_data/classification.csv")

# remove duplicated root_ids
classification = classification.drop_duplicates(subset='root_id')

In [3]:
avgs_dict = {}
for cell_type in tqdm(DECODING_CELLS):
    number_of_cells = len(classification[classification["cell_type"] == cell_type])
    if number_of_cells > 0:
        activation_tensor = get_activation_tensor(activations, cell_type, last_frame=last_good_frame) / 255
        avgs_dict[cell_type] = get_batch_voronoi_averages(activation_tensor, n_centers=number_of_cells)

100%|██████████| 34/34 [00:10<00:00,  3.12it/s]


In [4]:
def voronoi_averages_to_df(dict_with_voronoi_averages):
    dfs = []
    for key, matrix in dict_with_voronoi_averages.items():
        df = pd.DataFrame(matrix.transpose())
        df['index_name'] = key
        dfs.append(df)

    # Concatenate all the DataFrames into one
    return pd.concat(dfs, axis=0, ignore_index=True)
     

In [5]:
result_df = voronoi_averages_to_df(avgs_dict)

In [6]:
# Extract cell types and activations
cell_types = result_df.iloc[:, -1]  # Last column for cell type
activations = result_df.iloc[:, :-1]  # Exclude the last column

# Create a dictionary to hold shuffled root_ids for each cell type
root_id_mapping = {}

# Populate the dictionary with shuffled root_ids for each cell type
for cell_type, group in classification.groupby("cell_type"):
    # Shuffle the root_ids within each group
    shuffled_root_ids = group['root_id'].sample(frac=1).values
    root_id_mapping[cell_type] = shuffled_root_ids

# Function to assign root_ids to each row in result_df based on cell type and available root_ids
def assign_root_ids(row):
    cell_type = row.iloc[-1]  # Get cell type from the last column
    # Get the list of shuffled root_ids for this cell type
    root_ids = root_id_mapping[cell_type]
    # Assign a root_id from the list, ensuring we don't exceed the list's length
    # The index in the list is the count of occurrences of this cell type so far, modulo the number of available root_ids
    root_id_index = row.name % len(root_ids)  # row.name is the index of the row in the dataframe
    return root_ids[root_id_index]

# Apply the function to result_df, creating a new 'root_id' column
result_df['root_id'] = result_df.apply(assign_root_ids, axis=1)

# Remove duplicated root_ids
result_df = result_df.drop_duplicates(subset='root_id')


In [7]:
activation_df = pd.merge(
    classification.drop(
        columns=["flow", "super_class", "class", "sub_class", 
                 "hemibrain_type", "hemilineage", "side", "nerve"]), 
    result_df.drop(columns=[result_df.columns[-2]]), on='root_id', how='left').fillna(0)

In [8]:
from adult_models_helpers import get_synapse_df
synapse_df = get_synapse_df()

In [9]:
# Step 1: Identify Common Neurons
# Unique root_ids in merged_df
neurons_merged = pd.unique(activation_df['root_id'])

# Unique root_ids in synapse_df (both pre and post)
neurons_synapse_pre = pd.unique(synapse_df['pre_root_id'])
neurons_synapse_post = pd.unique(synapse_df['post_root_id'])
neurons_synapse = np.unique(np.concatenate([neurons_synapse_pre, neurons_synapse_post]))

# Common neurons
common_neurons = np.intersect1d(neurons_merged, neurons_synapse)

# Step 2: Filter synapse_df
# Keep only rows with both pre and post root_ids in common_neurons
from scipy.sparse import coo_matrix

# Filter synapse_df to include only rows with both pre and post root_ids in common_neurons
filtered_synapse_df = synapse_df[
    synapse_df['pre_root_id'].isin(common_neurons) & synapse_df['post_root_id'].isin(common_neurons)
]

# Map neuron root_ids to matrix indices
root_id_to_index = {root_id: index for index, root_id in enumerate(common_neurons)}

# Convert root_ids in filtered_synapse_df to matrix indices
pre_indices = filtered_synapse_df['pre_root_id'].map(root_id_to_index).values
post_indices = filtered_synapse_df['post_root_id'].map(root_id_to_index).values

# Use syn_count as the data for the non-zero elements of the matrix
data = filtered_synapse_df['syn_count'].values

# Create a sparse matrix in COO format
synaptic_matrix_sparse = coo_matrix(
    (data, (pre_indices, post_indices)),
    shape=(len(common_neurons), len(common_neurons)),
    dtype=np.int64  # or np.float32/np.float64 if memory issue persists
)

In [10]:
# Neurons in merged_df
neurons_merged = set(activation_df['root_id'])

# Neurons in synaptic_matrix_sparse
neurons_synaptic = set(common_neurons)  # common_neurons was used to build the synaptic matrix

# Neurons in merged_df not in synaptic_matrix
missing_in_synaptic = neurons_merged - neurons_synaptic

# Neurons in synaptic_matrix not in merged_df
missing_in_merged = neurons_synaptic - neurons_merged

In [11]:
from typing import Dict, Union
import torch
import torch.nn as nn

from adult_models_helpers import get_synapse_df


class AdultConnectomeNetwork(nn.Module):
    def __init__(
        self,
        adjacency_matrix,
        neuron_count: int,
        general_config: Dict[str, Union[int, float, str, bool]],
    ):
        super(AdultConnectomeNetwork, self).__init__()
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Convert the adjacency matrix to a PyTorch sparse tensor once in the initialization
        self.adjacency_matrix_coo = adjacency_matrix.tocoo()
        self.adj_matrix_sparse = torch.sparse_coo_tensor(
            torch.tensor(
                [self.adjacency_matrix_coo.row, self.adjacency_matrix_coo.col]
            ),
            torch.FloatTensor(self.adjacency_matrix_coo.data),
            torch.Size(self.adjacency_matrix_coo.shape),
            device=self.device
        )

        self.connectome_layer_number = general_config["CONNECTOME_LAYER_NUMBER"]

        # Initialize the shared weights for the connectome layers
        self.shared_weights = self.initialize_sparse_weights(
            adjacency_matrix, neuron_count
        )
        self.shared_bias = nn.Parameter(torch.ones(neuron_count))

    def initialize_sparse_weights(self, adjacency_matrix, neuron_count):
        # Generate random weights for existing connections, ensuring the tensor is on the same device
        weights = torch.rand(
            len(adjacency_matrix.data), device=self.device
        )  # Specify device here

        # Create sparse weights tensor, ensuring indices are on the same device
        indices = torch.tensor(
            [adjacency_matrix.row, adjacency_matrix.col], device=self.device
        )  # Specify device here
        sparse_weights = torch.sparse_coo_tensor(
            indices, weights, (neuron_count, neuron_count), device=self.device
        )

        return nn.Parameter(sparse_weights)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Use the pre-converted adjacency matrix in sparse format
        adj_matrix = self.adj_matrix_sparse.to(x.device)

        # Pass the input through the layer with shared weights
        for _ in range(self.connectome_layer_number):
            # Apply the mask from the adjacency matrix to the shared weights
            masked_weights = torch.sparse.mm(adj_matrix, self.shared_weights).to(x.device)

            # Do the forward pass using sparse matrix multiplication
            x = torch.sparse.mm(masked_weights, x) + self.shared_bias.unsqueeze(0)

        return x

In [12]:
activation_df = activation_df[activation_df['root_id'].isin(list(root_id_to_index.keys()))]

In [13]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


activation_data = activation_df.drop(columns=["root_id", "cell_type"])

# Convert the activation data DataFrame to a numpy array, then to a PyTorch tensor
activation_tensor = torch.tensor(activation_data.values, dtype=torch.float32).to(device)

# Transpose the tensor to have the sample dimension first (sample_size x neuron_count)
activation_tensor = activation_tensor.t()

sample_count, neuron_count = activation_tensor.shape
general_config = {"CONNECTOME_LAYER_NUMBER": 2}

adult_connectome_network = AdultConnectomeNetwork(synaptic_matrix_sparse, neuron_count, general_config)
adult_connectome_network.to(device)

# Now, you can feed the entire batch of samples into the network
output_activations = adult_connectome_network(activation_tensor)


  torch.tensor(


OutOfMemoryError: CUDA out of memory. Tried to allocate 808.00 MiB. GPU 0 has a total capacty of 7.58 GiB of which 476.00 MiB is free. Including non-PyTorch memory, this process has 6.95 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 53.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [145]:
from utils import flush_cuda_memory

flush_cuda_memory()