In [1]:
from pathlib import Path
from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

# Using the cache

Set up the cache to download from S3. 

https://alleninstitute.github.io/abc_atlas_access/notebooks/getting_started.html

In [2]:
download_base = Path('./data/abc_atlas')
abc_cache = AbcProjectCache.from_cache_dir(download_base)

abc_cache.current_manifest

'releases/20250331/manifest.json'

List all the different releases available

In [3]:
abc_cache.list_manifest_file_names

['releases/20230630/manifest.json',
 'releases/20230830/manifest.json',
 'releases/20231215/manifest.json',
 'releases/20240330/manifest.json',
 'releases/20240831/manifest.json',
 'releases/20241115/manifest.json',
 'releases/20241130/manifest.json',
 'releases/20250131/manifest.json',
 'releases/20250331/manifest.json']

Load an manifest

In [4]:
abc_cache.load_manifest('releases/20250331/manifest.json')

List all avilable directories

In [5]:
abc_cache.list_directories

['ASAP-PMDBS-10X',
 'ASAP-PMDBS-taxonomy',
 'Allen-CCF-2020',
 'MERFISH-C57BL6J-638850',
 'MERFISH-C57BL6J-638850-CCF',
 'MERFISH-C57BL6J-638850-imputed',
 'MERFISH-C57BL6J-638850-sections',
 'SEAAD',
 'SEAAD-taxonomy',
 'WHB-10Xv3',
 'WHB-taxonomy',
 'WMB-10X',
 'WMB-10XMulti',
 'WMB-10Xv2',
 'WMB-10Xv3',
 'WMB-neighborhoods',
 'WMB-taxonomy',
 'Zeng-Aging-Mouse-10Xv3',
 'Zeng-Aging-Mouse-WMB-taxonomy',
 'Zhuang-ABCA-1',
 'Zhuang-ABCA-1-CCF',
 'Zhuang-ABCA-2',
 'Zhuang-ABCA-2-CCF',
 'Zhuang-ABCA-3',
 'Zhuang-ABCA-3-CCF',
 'Zhuang-ABCA-4',
 'Zhuang-ABCA-4-CCF']

Following is the data we're interested in

nb. C57BL6J is the mouse model

In [6]:
abc_cache.list_data_files('MERFISH-C57BL6J-638850-imputed')

['C57BL6J-638850-imputed/log2']

In [7]:
abc_cache.list_metadata_files('MERFISH-C57BL6J-638850-imputed')

['gene']

Check how much total data is in a directory for data files and metadata files

In [8]:
abc_cache.get_directory_data_size('MERFISH-C57BL6J-638850-imputed')

'46.76 GB'

In [9]:
abc_cache.get_directory_metadata_size('MERFISH-C57BL6J-638850-imputed')

'0.61 MB'

# Download the data

In [10]:
allen_ccf_metadata = abc_cache.get_directory_metadata('MERFISH-C57BL6J-638850-imputed')
print("MERFISH data metadata files:\n\t", allen_ccf_metadata)

gene.csv: 100%|██████████████████████████████████████████████████████████████| 639k/639k [00:00<00:00, 645kMB/s]

MERFISH data metadata files:
	 [PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/metadata/MERFISH-C57BL6J-638850-imputed/20240831/gene.csv')]





In [11]:
allen_ccf_list = abc_cache.get_directory_data('MERFISH-C57BL6J-638850-imputed')
print("MERFISH data files:\n\t", allen_ccf_list)


	Total directory size = 46.76 GB


C57BL6J-638850-imputed-log2.h5ad:  34%|███████████▋                       | 16.8G/50.2G [23:55<50:20, 11.0MMB/s]

Check that the files downloaded successfully

In [15]:
allen_ccf_list = abc_cache.get_directory_data('MERFISH-C57BL6J-638850-imputed')
print("MERFISH data files:\n\t", allen_ccf_list, "\n\n")
allen_ccf_list = abc_cache.get_directory_metadata('MERFISH-C57BL6J-638850-imputed')
print("MERFISH metadata files:\n\t", allen_ccf_list)

## The original masured data

Might need the original measured data, so let's have a look at it

In [3]:
abc_cache.list_data_files('MERFISH-C57BL6J-638850')

['C57BL6J-638850/log2', 'C57BL6J-638850/raw']

In [4]:
abc_cache.list_metadata_files('MERFISH-C57BL6J-638850')

['cell_metadata',
 'cell_metadata_with_cluster_annotation',
 'example_genes_all_cells_expression',
 'gene']

Check how much total data is in a directory for data files and metadata files

In [5]:
abc_cache.get_directory_data_size('MERFISH-C57BL6J-638850')

'14.21 GB'

In [6]:
abc_cache.get_directory_metadata_size('MERFISH-C57BL6J-638850')

'1.94 GB'

In [7]:
allen_ccf = abc_cache.get_directory_metadata('MERFISH-C57BL6J-638850')
print("MERFISH data metadata files:\n\t", allen_ccf)

example_genes_all_cells_expression.csv: 100%|███████████████████████████████| 360M/360M [00:31<00:00, 11.4MMB/s]
gene.csv: 100%|████████████████████████████████████████████████████████████| 48.4k/48.4k [00:00<00:00, 205kMB/s]

MERFISH data metadata files:
	 [PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/metadata/MERFISH-C57BL6J-638850/20241115/cell_metadata.csv'), PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/metadata/MERFISH-C57BL6J-638850/20241115/views/cell_metadata_with_cluster_annotation.csv'), PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/metadata/MERFISH-C57BL6J-638850/20241115/views/example_genes_all_cells_expression.csv'), PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/metadata/MERFISH-C57BL6J-638850/20241115/gene.csv')]





In [8]:
allen_ccf_list = abc_cache.get_directory_data('MERFISH-C57BL6J-638850')
print("MERFISH data files:\n\t", allen_ccf_list)


	Total directory size = 14.21 GB


C57BL6J-638850-log2.h5ad: 100%|███████████████████████████████████████████| 7.63G/7.63G [10:07<00:00, 12.5MMB/s]
C57BL6J-638850-raw.h5ad: 100%|████████████████████████████████████████████| 7.63G/7.63G [10:04<00:00, 12.6MMB/s]

MERFISH data files:
	 [PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/expression_matrices/MERFISH-C57BL6J-638850/20230830/C57BL6J-638850-log2.h5ad'), PosixPath('/Users/rosalind/WSU/Project/2025-human-brain/data/abc_atlas/expression_matrices/MERFISH-C57BL6J-638850/20230830/C57BL6J-638850-raw.h5ad')]



