# Data Mining Example with AllenSDK

From https://allensdk.readthedocs.io/en/latest/_static/examples/nb/mouse_connectivity.html

In [2]:
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

In [3]:
# The manifest file is a simple JSON file that keeps track of all of
# the data that has already been downloaded onto the hard drives.
# If you supply a relative path, it is assumed to be relative to your
# current working directory.
mcc = MouseConnectivityCache()

# open up a list of all of the experiments
all_experiments = mcc.get_experiments(dataframe=True)
print("%d total experiments" % len(all_experiments))

# take a look at what we know about an experiment with a primary motor injection
all_experiments.loc[307558646]

2917 total experiments


gender                                                                   M
id                                                               307558646
injection-coordinates                                   [8740, 1590, 8140]
injection-structures     [{'id': 385, 'abbreviation': 'VISp', 'name': '...
product-id                                                               5
strain                                                            C57BL/6J
structure-abbrev                                                      VISp
structure-color                                                     08858c
structure-id                                                           385
structure-name                                         Primary visual area
transgenic-line                                                           
Name: 307558646, dtype: object

In [5]:
all_experiments[all_experiments['id']==307558646]

Unnamed: 0_level_0,gender,id,injection-coordinates,injection-structures,product-id,strain,structure-abbrev,structure-color,structure-id,structure-name,transgenic-line
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
307558646,M,307558646,"[8740, 1590, 8140]","[{'id': 385, 'abbreviation': 'VISp', 'name': '...",5,C57BL/6J,VISp,08858c,385,Primary visual area,


In [8]:
# pandas for nice tables
import pandas as pd

# grab the StructureTree instance
structure_tree = mcc.get_structure_tree()

# get info on some structures
structures = structure_tree.get_structures_by_name(['Primary visual area', 'Hypothalamus'])
pd.DataFrame(structures)

Unnamed: 0,acronym,graph_id,graph_order,id,name,rgb_triplet,structure_id_path,structure_set_ids
0,VISp,1,185,385,Primary visual area,"[8, 133, 140]","[997, 8, 567, 688, 695, 315, 669, 385]","[396673091, 112905828, 688152357, 691663206, 6..."
1,HY,1,715,1097,Hypothalamus,"[230, 68, 56]","[997, 8, 343, 1129, 1097]","[2, 112905828, 691663206, 12, 184527634, 11290..."


In [9]:
from allensdk.api.queries.ontologies_api import OntologiesApi

oapi = OntologiesApi()

# get the ids of all the structure sets in the tree
structure_set_ids = structure_tree.get_structure_sets()

# query the API for information on those structure sets
pd.DataFrame(oapi.get_structure_sets(structure_set_ids))

  self.node())))


Unnamed: 0,description,id,name
0,List of structures in Isocortex layer 5,667481446,Isocortex layer 5
1,List of structures in Isocortex layer 6b,667481450,Isocortex layer 6b
2,Summary structures of the cerebellum,688152368,Cerebellum
3,List of structures for ABA Differential Search,12,ABA - Differential Search
4,List of valid structures for projection target...,184527634,Mouse Connectivity - Target Search
5,Structures whose surfaces are represented by a...,691663206,Mouse Brain - Has Surface Mesh
6,Summary structures of the midbrain,688152365,Midbrain
7,Summary structures of the medulla,688152367,Medulla
8,Summary structures of the striatum,688152361,Striatum
9,Structures representing subdivisions of the mo...,687527945,Mouse Connectivity - Summary


In [10]:
# fetch the experiments that have injections in the isocortex of cre-positive mice
isocortex = structure_tree.get_structures_by_name(['Isocortex'])[0]
cre_cortical_experiments = mcc.get_experiments(cre=True, 
                                                injection_structure_ids=[isocortex['id']])

print("%d cre cortical experiments" % len(cre_cortical_experiments))

# same as before, but restrict the cre line
rbp4_cortical_experiments = mcc.get_experiments(cre=[ 'Rbp4-Cre_KL100' ], 
                                                injection_structure_ids=[isocortex['id']])


print("%d Rbp4 cortical experiments" % len(rbp4_cortical_experiments))

1211 cre cortical experiments
105 Rbp4 cortical experiments


In [11]:
# find wild-type injections into primary visual area
visp = structure_tree.get_structures_by_acronym(['VISp'])[0]
visp_experiments = mcc.get_experiments(cre=False, 
                                       injection_structure_ids=[visp['id']])

print("%d VISp experiments" % len(visp_experiments))

structure_unionizes = mcc.get_structure_unionizes([ e['id'] for e in visp_experiments ], 
                                                  is_injection=False,
                                                  structure_ids=[isocortex['id']],
                                                  include_descendants=True)

print("%d VISp non-injection, cortical structure unionizes" % len(structure_unionizes))

33 VISp experiments
29204 VISp non-injection, cortical structure unionizes


In [12]:
structure_unionizes.head()

Unnamed: 0,hemisphere_id,id,is_injection,max_voxel_density,max_voxel_x,max_voxel_y,max_voxel_z,normalized_projection_volume,projection_density,projection_energy,projection_intensity,projection_volume,experiment_id,structure_id,sum_pixel_intensity,sum_pixels,sum_projection_pixel_intensity,sum_projection_pixels,volume
0,1,630240332,False,0.275452,6500,1820,3330,4.1e-05,0.000205,0.123399,603.286011,3.3e-05,180296424,182305709,27584470000.0,132363900.0,16333550.0,27074.31,0.162146
1,2,630239860,False,0.494745,5800,3040,9380,0.00016,0.000294,0.114581,390.172089,0.00013,180296424,889,78973170000.0,362163600.0,41497120.0,106356.0,0.44365
2,2,630239504,False,0.586985,6920,1140,7950,0.020999,0.022122,12.03862,544.199328,0.017093,180296424,361,155204200000.0,630764100.0,7593530000.0,13953580.0,0.772686
3,1,630240910,False,0.285612,4710,4860,2650,0.000121,4.8e-05,0.016843,351.533434,9.8e-05,180296424,104,350934300000.0,1676093000.0,28230800.0,80307.57,2.053213
4,2,630239786,False,0.615703,4670,4620,8800,0.003526,0.00666,3.059985,459.459167,0.00287,180296424,783,66540450000.0,351775800.0,1076429000.0,2342816.0,0.430925


In [13]:
dense_unionizes = structure_unionizes[ structure_unionizes.projection_density > .5 ]
large_unionizes = dense_unionizes[ dense_unionizes.volume > .5 ]
large_structures = pd.DataFrame(structure_tree.nodes(large_unionizes.structure_id))

print("%d large, dense, cortical, non-injection unionizes, %d structures" % ( len(large_unionizes), len(large_structures) ))

print(large_structures.name)

large_unionizes

18 large, dense, cortical, non-injection unionizes, 18 structures
0                   Postrhinal area
1                      Visual areas
2               Lateral visual area
3     Primary visual area, layer 6a
4                   Postrhinal area
5         Rostrolateral visual area
6               Primary visual area
7               Lateral visual area
8         Rostrolateral visual area
9               Lateral visual area
10              Lateral visual area
11              Lateral visual area
12              Lateral visual area
13              Lateral visual area
14     Primary visual area, layer 1
15              Primary visual area
16              Lateral visual area
17              Lateral visual area
Name: name, dtype: object


Unnamed: 0,hemisphere_id,id,is_injection,max_voxel_density,max_voxel_x,max_voxel_y,max_voxel_z,normalized_projection_volume,projection_density,projection_energy,projection_intensity,projection_volume,experiment_id,structure_id,sum_pixel_intensity,sum_pixels,sum_projection_pixel_intensity,sum_projection_pixels,volume
71,2,630240820,False,1.0,9310,3180,9690,0.626614,0.744479,4059.92143,5453.373345,0.510067,180296424,312782628,2350927000000.0,559292300.0,2270683000000.0,416381300.0,0.685133
536,2,630239708,False,1.0,9170,2300,9690,4.24116,0.522139,2128.869392,4077.206061,3.452329,180296424,669,12798200000000.0,5397464000.0,11490500000000.0,2818228000.0,6.611893
706,3,630241279,False,1.0,9440,2350,2440,0.712232,0.532626,2363.397654,4437.253859,0.579761,180296424,409,2262464000000.0,888567600.0,2100039000000.0,473274400.0,1.088495
778,2,630239270,False,1.0,9200,1740,8390,0.411229,0.636154,2327.454346,3658.632568,0.334743,180296424,33,1103850000000.0,429549200.0,999756300000.0,273259600.0,0.526198
2399,2,636156255,False,1.0,9210,2680,9390,0.399948,0.612732,4104.780068,6699.147569,0.433155,309004492,312782628,2587937000000.0,577081800.0,2368794000000.0,353596300.0,0.706925
2896,2,634218630,False,1.0,8250,1770,8770,0.303568,0.500234,1285.347746,2569.495412,0.27588,309372716,417,679342600000.0,450205800.0,578671000000.0,225208000.0,0.551502
4086,2,636151215,False,1.0,9500,1260,8460,1.379541,0.508111,2117.836336,4168.060426,1.460212,307558646,385,5716203000000.0,2345965000.0,4968369000000.0,1192010000.0,2.873807
4284,2,636151229,False,1.0,8720,1520,9270,0.375017,0.593851,2496.28254,4203.54989,0.396947,307558646,409,1524682000000.0,545655900.0,1362111000000.0,324038300.0,0.668428
5078,2,633272766,False,1.0,7830,1500,8340,0.836458,0.627888,1960.408377,3122.223785,0.341144,113887162,417,962024800000.0,443526200.0,869492500000.0,278485000.0,0.54332
5563,3,636150146,False,1.0,8780,1980,2480,1.05937,0.506598,2611.704169,5155.378858,0.602429,307743253,409,2724830000000.0,970748100.0,2535307000000.0,491778900.0,1.189166
