In [1]:
# The purpose of this code is
# to get dataframe of cell_label, brain_section_label, average_correlation_score, 

In [2]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import SimpleITK as sitk
import pathlib

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

In [3]:
download_base = Path('C:/programming_data/abc_download_root')
abc_cache = AbcProjectCache.from_s3_cache(download_base)

# download_base = Path('../../data/abc_atlas') # Path to the already downloaded data or s3fs-fuse mount.
# abc_cache = AbcProjectCache.from_local_cache(download_base)

abc_cache.current_manifest

'releases/20240330/manifest.json'

In [30]:
##### generating dataframe of needs #####
cell = abc_cache.get_metadata_dataframe(directory='MERFISH-C57BL6J-638850', file_name='cell_metadata_with_cluster_annotation')
cell.rename(columns={'x': 'x_section',
                     'y': 'y_section',
                     'z': 'z_section'},
            inplace=True)
cell.set_index('cell_label', inplace=True)

# extract your needs from cell metadata
cell_extract = cell.loc[:, ['brain_section_label',
                            'cluster_alias',
                            'average_correlation_score',
                            'x_section',
                            'y_section',
                            'z_section']]

# reconstructed coordinates
reconstructed_coords = abc_cache.get_metadata_dataframe(
    directory='MERFISH-C57BL6J-638850-CCF',
    file_name='reconstructed_coordinates',
    dtype={"cell_label": str}
)
reconstructed_coords.rename(columns={'x': 'x_reconstructed',
                                     'y': 'y_reconstructed',
                                     'z': 'z_reconstructed'},
                            inplace=True)
reconstructed_coords.set_index('cell_label', inplace=True)
cell_joined = cell_extract.join(reconstructed_coords, how='inner')

# ccf coordinates
ccf_coords = abc_cache.get_metadata_dataframe(
    directory='MERFISH-C57BL6J-638850-CCF',
    file_name='ccf_coordinates',
    dtype={"cell_label": str}
)
ccf_coords.rename(columns={'x': 'x_ccf',
                           'y': 'y_ccf',
                           'z': 'z_ccf'},
                  inplace=True)
ccf_coords.drop(['parcellation_index'], axis=1, inplace=True)
ccf_coords.set_index('cell_label', inplace=True)
cell_joined = cell_joined.join(ccf_coords, how='inner')

# parcellation annotation
parcellation_annotation = abc_cache.get_metadata_dataframe(directory='Allen-CCF-2020',
                                                           file_name='parcellation_to_parcellation_term_membership_acronym')
parcellation_annotation.set_index('parcellation_index', inplace=True)
parcellation_annotation.columns = ['parcellation_%s'% x for x in  parcellation_annotation.columns]
parcellation_annotation = parcellation_annotation.loc[:, ['parcellation_division',
                                                          'parcellation_structure',
                                                          'parcellation_substructure']]
cell_joined = cell_joined.join(parcellation_annotation, on='parcellation_index')

cell_joined.head(5)

Unnamed: 0_level_0,brain_section_label,cluster_alias,average_correlation_score,x_section,y_section,z_section,x_reconstructed,y_reconstructed,z_reconstructed,parcellation_index,x_ccf,y_ccf,z_ccf,parcellation_division,parcellation_structure,parcellation_substructure
cell_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1019171907102340387-1,C57BL6J-638850.37,1408,0.596276,7.226245,4.148963,6.6,7.255606,4.00768,6.6,1160,7.495417,2.445872,7.455066,HPF,DG,DG-po
1104095349101460194-1,C57BL6J-638850.26,4218,0.64118,5.064889,7.309543,4.2,5.036436,7.264429,4.2,564,9.227966,6.133693,5.225024,P,TRN,TRN
1017092617101450577,C57BL6J-638850.25,4218,0.763531,5.792921,8.189973,4.0,5.78427,8.007646,4.0,761,9.344912,6.989939,6.002664,P,P-unassigned,P-unassigned
1018093344101130233,C57BL6J-638850.13,4218,0.558073,3.19595,5.868655,2.4,3.161528,5.719814,2.4,718,10.977068,4.398568,3.305223,cbf,arb,arb
1019171912201610094,C57BL6J-638850.27,4218,0.591009,5.635732,7.995842,4.4,5.618763,7.847877,4.4,761,8.997138,6.798329,5.827197,P,P-unassigned,P-unassigned


In [61]:
##### filtering by substructure #####
struct = 'LH'
pred = (cell_joined['parcellation_substructure'] == struct)

# use copy() to avoid SettingWithCopyWarning error
filtered = cell_joined[pred].copy()
print("number of cells:", len(filtered))
filtered.head(5)

number of cells: 2599


Unnamed: 0_level_0,brain_section_label,cluster_alias,average_correlation_score,x_section,y_section,z_section,x_reconstructed,y_reconstructed,z_reconstructed,parcellation_index,x_ccf,y_ccf,z_ccf,parcellation_division,parcellation_structure,parcellation_substructure
cell_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1017092617101800268,C57BL6J-638850.38,5283,0.582496,5.995759,4.715432,6.8,6.226971,4.699358,6.8,179,7.207646,3.195783,6.413064,TH,LH,LH
1017092617101780513-1,C57BL6J-638850.38,5283,0.555154,4.753934,5.018805,6.8,4.92227,4.845414,6.8,179,7.155061,3.329147,5.083562,TH,LH,LH
1017092617101800358-1,C57BL6J-638850.38,5283,0.516931,5.717248,4.819773,6.8,5.794362,4.844641,6.8,179,7.179035,3.349707,5.970946,TH,LH,LH
1019171906102380019,C57BL6J-638850.42,5283,0.542683,6.115045,4.638671,7.6,6.065471,4.932627,7.6,179,6.454495,3.432822,6.244846,TH,LH,LH
1017092617101570581,C57BL6J-638850.38,5283,0.550196,6.072982,4.476326,6.8,6.176254,4.516339,6.8,179,7.226975,2.98912,6.356838,TH,LH,LH


In [77]:
##### extracting near cells #####
address = 'C:/programming_data/abc_atlas_files/'
example_cfos = pd.read_csv(address + 'example_cfos_coords.csv')

# function for judjing near cells
def is_cell_near(cx, cy, cz, xx, yy, zz) :
    dis = ((cx - xx)**2 + (cy - yy)**2 + (cz - zz)**2)**0.5
    # SET CUSTOM distance range
    if dis <= 0.5:
        return 1
    else:
        return 0

# using reconstructed coords
# initialize cfos column
filtered['cfos'] = filtered['cfos'] * 0
for i in example_cfos.itertuples():
    xx = i.x
    yy = i.y
    zz = i.z
    filtered['cfos'] = filtered['cfos'] + filtered.apply(lambda i : is_cell_near(i['x_reconstructed'], i['y_reconstructed'], i['z_reconstructed'], xx, yy, zz), axis = 1)
filtered.head(5)

Unnamed: 0_level_0,brain_section_label,cluster_alias,average_correlation_score,x_section,y_section,z_section,x_reconstructed,y_reconstructed,z_reconstructed,parcellation_index,x_ccf,y_ccf,z_ccf,parcellation_division,parcellation_structure,parcellation_substructure,cfos
cell_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1017092617101800268,C57BL6J-638850.38,5283,0.582496,5.995759,4.715432,6.8,6.226971,4.699358,6.8,179,7.207646,3.195783,6.413064,TH,LH,LH,2
1017092617101780513-1,C57BL6J-638850.38,5283,0.555154,4.753934,5.018805,6.8,4.92227,4.845414,6.8,179,7.155061,3.329147,5.083562,TH,LH,LH,1
1017092617101800358-1,C57BL6J-638850.38,5283,0.516931,5.717248,4.819773,6.8,5.794362,4.844641,6.8,179,7.179035,3.349707,5.970946,TH,LH,LH,0
1019171906102380019,C57BL6J-638850.42,5283,0.542683,6.115045,4.638671,7.6,6.065471,4.932627,7.6,179,6.454495,3.432822,6.244846,TH,LH,LH,1
1017092617101570581,C57BL6J-638850.38,5283,0.550196,6.072982,4.476326,6.8,6.176254,4.516339,6.8,179,7.226975,2.98912,6.356838,TH,LH,LH,2


In [75]:
# download cfos count
filtered.to_csv(path_or_buf = address + 'cfos_' + struct + '.csv')

In [78]:
##### extracting cluster info #####
# cluster info
cluster = filtered.loc[:, ['cluster_alias',
                           'parcellation_index',
                           'parcellation_division',
                           'parcellation_structure',
                           'parcellation_substructure']]

In [79]:
# download files
filtered.to_csv(path_or_buf = address + 'metadata_' + struct + '.csv')
cluster.to_csv(path_or_buf = address + 'cluster_' + struct + '.csv')

In [28]:
cluster_count = cluster['cluster_alias'].value_counts(dropna = False).to_frame()
cluster_count

Unnamed: 0_level_0,count
cluster_alias,Unnamed: 1_level_1
5231,315
14956,279
2907,241
5254,157
2898,135
...,...
3434,1
4684,1
5058,1
5062,1
