Allen Institute resources for API access:
- http://help.brain-map.org/display/api/Allen+Brain+Atlas+API
- http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data
- http://help.brain-map.org/display/api/Example+Queries+for+Experiment+Metadata
- http://help.brain-map.org/pages/viewpage.action?pageId=5308449

XML parsing:
- https://www.datacamp.com/community/tutorials/python-xml-elementtree
- https://www.kite.com/python/answers/how-to-download-a-csv-file-from-a-url-in-python

Python parallel processing:
- https://www.machinelearningplus.com/python/parallel-processing-python


In [65]:
import os
import pandas as pd
import requests
# import xml.etree.ElementTree as ET

In [66]:
os.getcwd()

'/projects/abeauchamp/Projects/MouseHumanMapping/Paper_TranscriptomicSimilarity/AllenMouseBrainAtlas'

In [67]:
data_dir = os.getcwd()+'/data/'

In [68]:
# plane = 'coronal'
plane = 'sagittal'

In [69]:
metadata_file = 'AMBA_metadata_{}.csv'.format(plane)
if os.path.isfile(data_dir+metadata_file):
    dfMetadata = pd.read_csv(data_dir+metadata_file)
else:
    abi_query_metadata = "http://api.brain-map.org/api/v2/data/SectionDataSet/query.csv?"+\
"criteria=[failed$eqfalse],plane_of_section[name$eq{}],products[abbreviation$eqMouse],treatments[name$eqISH],genes&".format(plane)+\
"tabular=data_sets.id+as+experiment_id,data_sets.section_thickness,data_sets.specimen_id,"+\
"plane_of_sections.name+as+plane,"+\
"genes.acronym+as+gene,genes.name+as+gene_name,genes.chromosome_id,genes.entrez_id,genes.genomic_reference_update_id,genes.homologene_id,genes.organism_id&"+\
"start_row=0&num_rows=all"
    
    dfMetadata = pd.read_csv(abi_query_metadata)
    dfMetadata.to_csv(data_dir+metadata_file)

In [70]:
dfMetadata.shape

(21734, 11)

In [71]:
dfMetadata.head()

Unnamed: 0,experiment_id,section_thickness,specimen_id,plane,gene,gene_name,chromosome_id,entrez_id,genomic_reference_update_id,homologene_id,organism_id
0,375,25,710147,sagittal,Ebf1,early B cell factor 1,36.0,13591.0,491928275.0,7297.0,2
1,386,25,710148,sagittal,Efnb1,ephrin B1,57.0,13641.0,491928275.0,3263.0,2
2,81600550,25,6510,sagittal,Tmem30a,transmembrane protein 30A,54.0,69981.0,491928275.0,110703.0,2
3,68269598,25,67501035,sagittal,Csrp1,cysteine and glycine-rich protein 1,34.0,13007.0,491928275.0,37874.0,2
4,68844361,25,68151384,sagittal,Slc4a1ap,"solute carrier family 4 (anion exchanger), mem...",50.0,20534.0,491928275.0,7543.0,2


In [72]:
temp_id = dfMetadata.loc[0,'experiment_id']

In [73]:
url = 'http://api.brain-map.org/grid_data/download/{}'.format(temp_id)
url

'http://api.brain-map.org/grid_data/download/375'

In [9]:
r = requests.get(url)


In [10]:
outfile = 'data/expression/temp.zip'
with open(outfile, 'wb') as file:
    file.write(r.content)

In [11]:
from zipfile import ZipFile
expression_dir = 'data/expression/'
raw_file = 'energy.raw'
minc_file = 'energy.mnc'

In [12]:
with ZipFile(expression_dir+'temp.zip', 'r') as file:
    file.extract(raw_file, path = expression_dir)

In [13]:
cmd = 'cat {} | rawtominc {} -signed -float -ounsigned -oshort -xstep 0.2 -ystep 0.2 -zstep 0.2 -clobber 58 41 67'.format(expression_dir+raw_file,expression_dir+minc_file)
cmd

'cat data/expression/energy.raw | rawtominc data/expression/energy.mnc -signed -float -ounsigned -oshort -xstep 0.2 -ystep 0.2 -zstep 0.2 -clobber 58 41 67'

In [14]:
os.system(cmd)

0

In [25]:
import numpy as np
from pyminc.volumes.factory import *

In [26]:
infile = expression_dir+minc_file
vol = volumeFromFile(infile)

In [27]:
size_10 = 1320*800*1140
size_25 = 528*320*456
size_50 = 264*160*228
size_100 = 132*80*114
size_200 = 58*41*67
map_resolutions = {size_10: 10,
                   size_25: 25,
                   size_50: 50,
                   size_100: 100,
                   size_200: 200}

In [28]:
res = map_resolutions[vol.data.size]

In [29]:
def reorient_to_standard(dat):
    dat = np.rot90(dat, k=1, axes=(0, 2))
    dat = np.rot90(dat, k=1, axes=(0, 1))

    shape = dat.shape
    dat = np.ravel(dat)
    dat = np.reshape(dat, shape)

    return(dat)

def do_nothing(dat):
    return(dat)

In [30]:
# Map arguments to functions/dicts/values
map_voxel_orientations = {"RAS" :   reorient_to_standard,
                          "PIR":    do_nothing}

voxel_orientation = 'RAS'

# Voxel orientation
if voxel_orientation in map_voxel_orientations:
    new_data = map_voxel_orientations[voxel_orientation](vol.data)
else:
    print("Invalid voxel orientation")
    sys.exit(1)

In [31]:
# %% Coordinate definitions

# Centers are listed as x,y,z; reverse these when writing out
# Centers listed in um in CCFv3 coordinates
centers_RAS = {"MICe"   :   [5700, 7900, 2700],
               "CCFv3"  :   [0, 13200, 8000]}
centers_PIR = {"MICe"   :   [5300, 5300, 5700],
               "CCFv3"  :   [0, 0, 0]}

# Direction cosines
direction_cosines_RAS = {"MICe"     :   [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                         "CCFv3"    :   [[0, 0, 1], [-1, 0, 0], [0, -1, 0]]}
    
direction_cosines_PIR = {"MICe"     :   [[0, -1, 0], [0, 0, -1], [1, 0, 0]],
                         "CCFv3"    :   [[1, 0, 0], [0, 1, 0], [0, 0, 1]]}

# Map arguments to functions/dicts/values
map_voxel_orientations = {"RAS" :   reorient_to_standard,
                          "PIR":    do_nothing}

map_centers = {"RAS"    :   centers_RAS,
               "PIR"    :   centers_PIR}

map_dir_cosines = {"RAS"    :   direction_cosines_RAS,
                   "PIR"    :   direction_cosines_PIR}

In [32]:
expansion_factor = 1.0
world_space = "MICe"

# World coordinate system
centers = [expansion_factor*c/(1000)
           for c in map_centers[voxel_orientation][world_space]]
steps = [expansion_factor*res/1000] * 3
xdc = map_dir_cosines[voxel_orientation][world_space][0]
ydc = map_dir_cosines[voxel_orientation][world_space][1]
zdc = map_dir_cosines[voxel_orientation][world_space][2]

In [34]:
volume_type = None
data_type = None
labels = False

# Types
vtype = vol.volumeType if volume_type is None else volume_type
dtype = vol.dtype if data_type is None else data_type
labels = vol.labels if labels is None else labels

In [62]:
outfile = 'data/expression/test.mnc'

def is_minc(infile):
    if os.path.splitext(infile)[1] == ".mnc":
        return(True)
    else:
        return(False)

is_minc(outfile)

True

In [63]:
outfile

'data/expression/test.mnc'

In [64]:
outvol = volumeFromDescription(outputFilename=outfile,
                               dimnames=["zspace", "yspace", "xspace"],
                               sizes=new_data.shape,
                               starts=[-c for c in reversed(centers)],
                               steps=[s for s in reversed(steps)],
                               x_dir_cosines=xdc,
                               y_dir_cosines=ydc,
                               z_dir_cosines=zdc,
                               volumeType=vtype,
                               dtype=dtype,
                               labels=labels)

outvol.data = new_data
outvol.writeFile()
outvol.closeVolume()

In [74]:
dfTemp = pd.read_csv("/projects/yyee/tools/Allen_brain/data/gene_data.csv")

In [76]:
dfTemp.shape

(28848, 39)

In [84]:
dfTemp.loc[dfTemp['msg.id'] == 375]

Unnamed: 0,success,id,start_row,num_rows,total_rows,msg.blue_channel,msg.delegate,msg.expression,msg.failed,msg.failed_facet,...,msg.genes.homologene_id,msg.genes.id,msg.genes.legacy_ensembl_gene_id,msg.genes.name,msg.genes.organism_id,msg.genes.original_name,msg.genes.original_symbol,msg.genes.reference_genome_id,msg.genes.sphinx_id,msg.genes.version_status
17633,True,0,0,28848,28850,,False,False,False,734881840,...,7297.0,13369,,early B cell factor 1,2,early B cell factor 1,Ebf1,,98953,no change
