Allen Institute resources for API access:
- http://help.brain-map.org/display/api/Allen+Brain+Atlas+API
- http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data
- http://help.brain-map.org/display/api/Example+Queries+for+Experiment+Metadata
- http://help.brain-map.org/pages/viewpage.action?pageId=5308449

XML parsing:
- https://www.datacamp.com/community/tutorials/python-xml-elementtree
- https://www.kite.com/python/answers/how-to-download-a-csv-file-from-a-url-in-python

Python parallel processing:
- https://www.machinelearningplus.com/python/parallel-processing-python


In [1]:
import os
import argparse
import numpy as np
import pandas as pd
import requests
from pyminc.volumes.factory import *
from zipfile import ZipFile

In [2]:
def fetch_metadata(dataset = 'coronal', outdir='./'):

    """ """

    metadata_file = outdir+'AMBA_metadata_{}.csv'.format(dataset)

    if os.path.isfile(metadata_file):
        metadata = pd.read_csv(metadata_file, index_col = None)
    else:
        abi_query_metadata = "http://api.brain-map.org/api/v2/data/SectionDataSet/query.csv?"+\
"criteria=[failed$eqfalse],plane_of_section[name$eq{}],products[abbreviation$eqMouse],treatments[name$eqISH],genes&".format(dataset)+\
"tabular=data_sets.id+as+experiment_id,data_sets.section_thickness,data_sets.specimen_id,"+\
"plane_of_sections.name+as+plane,"+\
"genes.acronym+as+gene,genes.name+as+gene_name,genes.chromosome_id,genes.entrez_id,genes.genomic_reference_update_id,genes.homologene_id,genes.organism_id&"+\
"start_row=0&num_rows=all"

        metadata = pd.read_csv(abi_query_metadata)
        metadata.to_csv(metadata_file, index=False)

    return metadata

In [37]:
def fetch_expression(experiment_id, outdir = './tmp/'):

    """ """

    if not os.path.exists(outdir):
        os.mkdir(outdir)

    abi_query_expr = 'http://api.brain-map.org/grid_data/download/{}'.format(experiment_id)

    amba_request = requests.get(abi_query_expr)

    tmpfile = outdir+str(experiment_id)+'.zip'
    with open(tmpfile, 'wb') as file:
        file.write(amba_request.content)

    with ZipFile(tmpfile, 'r') as file:
        try:
            file.extract('energy.raw', path = outdir)
            os.rename(outdir+'energy.raw', outdir+str(experiment_id)+'.raw')
            success = 1
        except KeyError as err:
            print('Error for experiment {}: {}'.format(experiment_id, err))
            success = 0
            
    os.remove(tmpfile)
 
    return success

In [20]:
def transform_space(infile, outfile, voxel_orientation = 'RAS', world_space = 'MICe', expansion_factor = 1.0, volume_type = None, data_type = None, labels = False):

    def reorient_to_standard(dat):
        dat = np.rot90(dat, k=1, axes=(0, 2))
        dat = np.rot90(dat, k=1, axes=(0, 1))

        shape = dat.shape
        dat = np.ravel(dat)
        dat = np.reshape(dat, shape)

        return(dat)

    def do_nothing(dat):
        return(dat)

    # %% Coordinate definitions

    # Centers are listed as x,y,z; reverse these when writing out
    # Centers listed in um in CCFv3 coordinates
    centers_RAS = {"MICe"   :   [5700, 7900, 2700],
                   "CCFv3"  :   [0, 13200, 8000]}
    centers_PIR = {"MICe"   :   [5300, 5300, 5700],
                   "CCFv3"  :   [0, 0, 0]}

    # Direction cosines
    direction_cosines_RAS = {"MICe"     :   [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                             "CCFv3"    :   [[0, 0, 1], [-1, 0, 0], [0, -1, 0]]}

    direction_cosines_PIR = {"MICe"     :   [[0, -1, 0], [0, 0, -1], [1, 0, 0]],
                             "CCFv3"    :   [[1, 0, 0], [0, 1, 0], [0, 0, 1]]}

    # Map arguments to functions/dicts/values
    map_voxel_orientations = {"RAS" :   reorient_to_standard,
                              "PIR":    do_nothing}

    map_centers = {"RAS"    :   centers_RAS,
                   "PIR"    :   centers_PIR}

    map_dir_cosines = {"RAS"    :   direction_cosines_RAS,
                       "PIR"    :   direction_cosines_PIR}

    size_10 = 1320*800*1140
    size_25 = 528*320*456
    size_50 = 264*160*228
    size_100 = 132*80*114
    size_200 = 58*41*67

    map_resolutions = {size_10: 10,
                       size_25: 25,
                       size_50: 50,
                       size_100: 100,
                       size_200: 200}

    vol = volumeFromFile(infile)

    res = map_resolutions[vol.data.size]

    # Voxel orientation
    if voxel_orientation in map_voxel_orientations:
        new_data = map_voxel_orientations[voxel_orientation](vol.data)
    else:
        print("Invalid voxel orientation")
        sys.exit(1)

    # World coordinate system
    centers = [expansion_factor*c/(1000) 
               for c in map_centers[voxel_orientation][world_space]]
    steps = [expansion_factor*res/1000] * 3
    xdc = map_dir_cosines[voxel_orientation][world_space][0]
    ydc = map_dir_cosines[voxel_orientation][world_space][1]
    zdc = map_dir_cosines[voxel_orientation][world_space][2]

    # Types
    vtype = vol.volumeType if volume_type is None else volume_type
    dtype = vol.dtype if data_type is None else data_type
    labels = vol.labels if labels is None else labels

    outvol = volumeFromDescription(outputFilename=outfile,
                                   dimnames=["zspace", "yspace", "xspace"],
                                   sizes=new_data.shape,
                                   starts=[-c for c in reversed(centers)],
                                   steps=[s for s in reversed(steps)],
                                   x_dir_cosines=xdc,
                                   y_dir_cosines=ydc,
                                   z_dir_cosines=zdc,
                                   volumeType=vtype,
                                   dtype=dtype,
                                   labels=labels)

    outvol.data = new_data
    outvol.writeFile()
    outvol.closeVolume()

In [21]:
dataset = 'sagittal'
datadir = 'data/expression/'

dfMetadata = fetch_metadata(dataset = dataset,
                           outdir = datadir)

In [22]:
dfTemp = dfMetadata.loc[:5]
for i, row in dfTemp.iterrows():

    experiment_id = row['experiment_id']

    outdir = datadir+'{}/'.format(dataset)
    success = fetch_expression(experiment_id = experiment_id, 
                               outdir = outdir)


    infile = outdir+'{}.raw'.format(experiment_id)
    outfile = outdir+'{}_tmp.mnc'.format(experiment_id)

    cmd = 'cat {} | rawtominc {} -signed -float -ounsigned -oshort -xstep 0.2 -ystep 0.2 -zstep 0.2 -clobber 58 41 67'.format(infile, outfile)

    os.system(cmd)
    os.remove(infile)

    infile = outfile
    outfile = outdir+'{}_tmp2.mnc'.format(experiment_id)
    transform_space(infile = infile,
                    outfile = outfile,
                    voxel_orientation = 'RAS',
                    world_space = 'MICe',
                    expansion_factor = 1.0)
    os.remove(infile)


    gene_id = row['gene']

    infile = outfile
    outfile = outdir+'{}_{}.mnc'.format(gene_id, experiment_id)
    os.rename(infile, outfile)

Error for experiment 375: "There is no item named 'energy.raw' in the archive"


cat: data/expression/sagittal/375.raw: No such file or directory
rawtominc: Premature end of file.


FileNotFoundError: [Errno 2] No such file or directory: 'data/expression/sagittal/375.raw'

In [38]:
experiment_id = dfMetadata.loc[0,'experiment_id']

outdir = datadir+'{}/'.format(dataset)
success = fetch_expression(experiment_id = experiment_id, 
                 outdir = outdir)

if success == 1:

    infile = outdir+'{}.raw'.format(experiment_id)
    outfile = outdir+'{}_tmp.mnc'.format(experiment_id)

    cmd = 'cat {} | rawtominc {} -signed -float -ounsigned -oshort -xstep 0.2 -ystep 0.2 -zstep 0.2 -clobber 58 41 67'.format(infile, outfile)

    os.system(cmd)
    os.remove(infile)

    infile = outfile
    outfile = outdir+'{}_tmp2.mnc'.format(experiment_id)
    transform_space(infile = infile,
                    outfile = outfile,
                    voxel_orientation = 'RAS',
                    world_space = 'MICe',
                    expansion_factor = 1.0)
    os.remove(infile)


    gene_id = dfMetadata.loc[0,'gene']

    infile = outfile
    outfile = outdir+'{}_{}.mnc'.format(gene_id, experiment_id)
    os.rename(infile, outfile)

Error for experiment 375: "There is no item named 'energy.raw' in the archive"


In [35]:
dfMetadata['success'] = 0

In [36]:
dfMetadata.head()

Unnamed: 0,experiment_id,section_thickness,specimen_id,plane,gene,gene_name,chromosome_id,entrez_id,genomic_reference_update_id,homologene_id,organism_id,success
0,375,25,710147,sagittal,Ebf1,early B cell factor 1,36.0,13591.0,491928275.0,7297.0,2,0
1,386,25,710148,sagittal,Efnb1,ephrin B1,57.0,13641.0,491928275.0,3263.0,2,0
2,81600550,25,6510,sagittal,Tmem30a,transmembrane protein 30A,54.0,69981.0,491928275.0,110703.0,2,0
3,68269598,25,67501035,sagittal,Csrp1,cysteine and glycine-rich protein 1,34.0,13007.0,491928275.0,37874.0,2,0
4,68844361,25,68151384,sagittal,Slc4a1ap,"solute carrier family 4 (anion exchanger), mem...",50.0,20534.0,491928275.0,7543.0,2,0


In [39]:
success

0

In [40]:
bool(success)

False