In [1]:
import os

In [2]:
import re

In [3]:
import zipfile

In [4]:
import glob

In [None]:
import itertools

In [5]:
import numpy as np

In [6]:
import pandas as pd

In [7]:
import xarray as xr

In [8]:
import matplotlib.pyplot as plt

In [9]:
import mkgu

In [10]:
from mkgu.knownfile import KnownFile as kf

In [11]:
from mkgu.knownfile import FileRecord, Sighting

In [12]:
from mkgu.lookup import pwdb

In [13]:
from mkgu.assemblies import AssemblyModel, AssemblyStoreMap, AssemblyStoreModel

In [14]:
from mkgu.stimuli import ImageModel, AttributeModel, ImageMetaModel, StimulusSetModel, ImageStoreModel, \
    StimulusSetImageMap, ImageStoreMap

## Process .nc files

In [15]:
v2_base_path = "/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1"

In [16]:
nc_files = glob.glob(os.path.join(v2_base_path, "*/*/*.nc"), recursive=True)
sorted(nc_files)

['/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data11/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data12/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data13/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data14/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data15/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data16/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data17/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data18/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data19/NatRev/data.nc',
 '/braintree/data2/acti

In [17]:
gd_arrays = {}
for f in nc_files:
    gd_arrays[f] = xr.open_dataarray(f)
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (image_file_name: 96000, neuroid: 10)>
 [960000 values with dtype=float64]
 Coordinates:
   * image_file_name  (image_file_name) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/stimuli//1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
     category_name    (image_file_name) float64 ...
     stimulusRepeats  (image_file_name) int64 ...
   * neuroid          (neuroid) object 'e0022' 'e0026' 'e0031' 'e0033' ...
     region           (neuroid) object ...
     animal           (neuroid) object ...,
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/data.nc': <xarray.DataArray (image_file_name: 83600, neuroid: 10)>
 [836000 values with dtype=float64]
 Coordinates:
   * image_file_name  (image_file_name) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/stimuli//0c816a8354b91506395116bd8145f96b.jpg' 

In [18]:
for gd_array_key in gd_arrays:
    gd_array = gd_arrays[gd_array_key]
    gd_array = gd_array.T.rename({"image_file_name": "presentation"})
    gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[1]))
    gd_array.coords["neuroid_id"] = ("neuroid", gd_array["neuroid"].values)
    gd_arrays[gd_array_key] = gd_array
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (neuroid: 10, presentation: 96000)>
 array([[ 1.,  0.,  1., ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * presentation     (presentation) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/stimuli//1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
     category_name    (presentation) float64 ...
     stimulusRepeats  (presentation) int64 ...
   * neuroid          (neuroid) object 'e0022' 'e0026' 'e0031' 'e0033' ...
     region           (neuroid) object ...
     animal           (neuroid) object ...
     presentation_id  (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...
     neuroid_id       (neuroid) object 'e0022' 'e00

In [19]:
def massage_file_name(file_name):
    split = re.split("\\\\|/", file_name)
    split = [t for t in split if t]
    relative_path = os.path.join(*split[-5:])
    full_path = os.path.join("/", *split)
    basename = split[-1]
    exists = os.path.exists(full_path)
    sha1 = kf(full_path).sha1
    result = {
        "image_file_path_original": relative_path,
        "image_id": sha1
    }
    return result

In [None]:
for gd_array_key in gd_arrays:
    print(gd_array_key)
    gd_array = gd_arrays[gd_array_key]
    df_massage = pd.DataFrame(list(map(massage_file_name, gd_array["presentation"].values)))
    for column in df_massage.columns:
        gd_array.coords[column] = ("presentation", df_massage[column])
    gd_array.reset_index(["neuroid", "presentation"], drop=True, inplace=True)
gd_arrays

#### Combine arrays

In [32]:
neuroid_sum, presentation_sum = (0, 0)
for k in gd_arrays:
    neuroid_sum = neuroid_sum + gd_arrays[k].shape[0]
    presentation_sum = presentation_sum + gd_arrays[k].shape[1]
(neuroid_sum, presentation_sum)

(195, 3494103)

In [None]:
for gd_array_key in gd_arrays:
    gd_array = gd_arrays[gd_array_key]
    mkgu.assemblies.gather_indexes(gd_array)
gd_arrays

In [36]:
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (neuroid: 10, presentation: 96000)>
 array([[ 1.,  0.,  1., ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0022' 'e0026' 'e0031' ...
   * presentation              (presentation) MultiIndex
   - category_name             (presentation) float64 nan nan nan nan nan nan ...
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int6

In [67]:
gd_arrays[gd_arrays_keys[0]]["category_name"]

<xarray.DataArray 'category_name' (presentation: 158600)>
array([nan, nan, nan, ..., nan, nan, nan])
Coordinates:
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) float64 nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data9/NatRev/stimuli/194cc7b0a35c9631d326864532eea21e.jpg' ...
  - image_id                  (presentation) object 'd74c5c31ea2625c047c980025127cbb7fcaabcac' ...

In [68]:
gd_arrays[gd_arrays_keys[0]]["category_name"].dtype

dtype('float64')

In [66]:
np.nonzero(~np.isnan(gd_arrays[gd_arrays_keys[0]]["category_name"].values))

(array([], dtype=int64),)

In [69]:
for da in gd_arrays.values():
    da.reset_index("category_name", drop=True, inplace=True)

In [43]:
np.nonzero(~np.isnan(list(gd_arrays.values())[0]))

<xarray.DataArray (neuroid: 2, presentation: 4312)>
array([[     0,      0,      0, ...,      9,      9,      9],
       [     0,      1,      2, ..., 158146, 158147, 158148]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) float64 nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data9/NatRev/stimuli/194cc7b0a35c9631d326864532eea21e.jpg' ...
  - image_id                  (presentation) object 'd74c5c31ea2625c047c980025127cbb7fcaabcac' ...

In [48]:
np.nonzero(~np.isnan(list(gd_arrays.values())[0].values))

(array([0, 0, 0, ..., 9, 9, 9]),
 array([     0,      1,      2, ..., 158146, 158147, 158148]))

In [50]:
gd_arrays_keys = list(gd_arrays.keys())
gd_arrays_keys

['/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data9/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data18/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data2/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data3/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data4/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data5/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data6/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data7/Nat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data7/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data8/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkg

In [70]:
align_test = xr.align(gd_arrays[gd_arrays_keys[0]], gd_arrays[gd_arrays_keys[5]], join="outer")
align_test

  if np.issubdtype(dtype, float):


(<xarray.DataArray (neuroid: 20, presentation: 464600)>
 array([[nan,  0., nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
   * presentation              (presentation) MultiIndex
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int64 0 0 1 1 2 2 3 3 4 4 5 5 6 ...
   - image_file_path_original  (presentation) object 'v2-1/V2Data4/DGrat/stimuli/eeac392eaf7590b19733c34f31b4322e.jpg' ...
   - image_i

In [71]:
[np.isnan(a).all() for a in align_test]

[<xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False)]

In [60]:
align_test[0].data.dtype

dtype('float64')

In [61]:
align_test[0]

<xarray.DataArray (neuroid: 20, presentation: 464600)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) object nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data9/NatRe

In [54]:
(gd_arrays[gd_arrays_keys[0]], gd_arrays[gd_arrays_keys[5]])

(<xarray.DataArray (neuroid: 10, presentation: 158600)>
 array([[ 0.,  0.,  0., ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
   * presentation              (presentation) MultiIndex
   - category_name             (presentation) float64 nan nan nan nan nan nan ...
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
   - image_file_path_original  (presentation) object 

In [57]:
[(k, len(np.unique(gd_arrays[k]["image_id"])), np.nonzero(~np.isnan(gd_arrays[k]))) for k in gd_arrays_keys]

[('/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data9/NatRev/data.nc',
  50399,
  <xarray.DataArray (neuroid: 2, presentation: 4312)>
  array([[     0,      0,      0, ...,      9,      9,      9],
         [     0,      1,      2, ..., 158146, 158147, 158148]])
  Coordinates:
    * neuroid                   (neuroid) MultiIndex
    - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
    - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
    - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
    * presentation              (presentation) MultiIndex
    - category_name             (presentation) float64 nan nan nan nan nan nan ...
    - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
    - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
    - image_file_path_original  (presentation) object 'v2-1/V2Data9/NatRev/stimuli/194cc7b0a35c9631d326

In [72]:
aligned = xr.align(*list(gd_arrays.values()), join="outer")
aligned

  if np.issubdtype(dtype, float):


(<xarray.DataArray (neuroid: 135, presentation: 3494103)>
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
   * presentation              (presentation) MultiIndex
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
   - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGrat/stimuli/1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
   - image

In [73]:
aligned[0].shape

(135, 3494103)

In [74]:
~np.isnan(aligned[0])

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGrat/stimu

In [75]:
[(~np.isnan(da)).any() for da in aligned]

[<xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True), <xarray.DataArray ()>
 array(True)]

In [76]:
np.nonzero(~np.isnan(aligned[0].values))

(array([15, 15, 15, ..., 24, 24, 24]),
 array([     18,      37,      56, ..., 3469049, 3469050, 3469051]))

In [77]:
non_nan_indices = []
for da in aligned:
    non_nan_indices.append(np.flatnonzero(~np.isnan(da.values))) 
non_nan_indices

[array([52411563, 52411582, 52411601, ..., 87327521, 87327522, 87327523]),
 array([380857236, 380857255, 380857274, ..., 422741751, 422741752,
        422741753]),
 array([ 10482309,  10482328,  10482347, ..., 137925857, 137925873,
        137925889]),
 array([139764130, 139764149, 139764168, ..., 228509519, 228509538,
        228509557]),
 array([237599016, 237599035, 237599054, ..., 334931465, 334931470,
        334931475]),
 array([339102204, 339102223, 339102242, ..., 370033691, 370033694,
        370033697]),
 array([370374932, 370374951, 370374970, ..., 408808251, 408808252,
        408808253]),
 array([408810256, 408810275, 408810294, ..., 470028830, 470028844,
        470028858]),
 array([ 13932012,  13932013,  13932014,  13932015,  13932016,  13932017,
         13932018,  34897230,  34897231,  34897232,  34897233,  34897234,
         34897235,  34897236,  34897237,  34897238,  34897239,  34897240,
         34897241,  34897242,  34897243,  34897244,  34897245,  34897246,
      

In [78]:
# should all be False
for a, b in itertools.combinations(non_nan_indices, 2):
    print(np.in1d(a, b).any())

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
Fals

In [79]:
blank = np.full_like(aligned[0], np.nan)
blank

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [80]:
da_result = xr.DataArray(blank, coords=aligned[0].coords, dims=aligned[0].dims)
da_result

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGrat/stimuli/1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
  - image_id               

In [81]:
for da in aligned:
    da_result = da_result.combine_first(da)
da_result

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGrat/stimuli/1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
  - image_id               

In [82]:
np.nonzero(~np.isnan(da_result))

<xarray.DataArray (neuroid: 2, presentation: 74740)>
array([[      0,       0,       0, ...,     134,     134,     134],
       [     16,      35,      54, ..., 3481545, 3481546, 3481547]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGrat/stimuli/1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
  - image_id                  (presentation) object '437fcb15ba2caa46c4a2aea77796c8316b8a98cf' ...

In [84]:
sum([len(n) for n in non_nan_indices])

74740

In [85]:
def levels_for_index(xr_data, index):
    return xr_data.indexes[index].names

def all_index_levels(xr_data):
    nested = [levels_for_index(xr_data, index) for index in xr_data.indexes]
    return [x for inner in nested for x in inner]

In [86]:
da_result.reset_index(all_index_levels(da_result), inplace=True)

In [87]:
da_result.to_netcdf("/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/crcns_v2-1_neuronal.nc")

In [88]:
!ls -hal /braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1

total 4.2G
drwxr-xr-x 23 jjpr dicarlo 4.0K Jun  5 15:52 .
drwxr-xr-x  4 jjpr dicarlo 4.0K May 16 14:58 ..
-rw-r--r--  1 jjpr dicarlo 4.2G Jun  5 15:54 crcns_v2-1_neuronal.nc
drwxr-xr-x  2 jjpr dicarlo 4.0K Sep 13  2010 functions
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data1
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data10
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data11
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data12
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data13
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data14
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data15
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data16
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data17
drwxr-xr-x  3 jjpr dicarlo 4.0K Aug 25  2010 V2Data18
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data19
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data2
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data20
drwxrwxrwx  3 jjpr dicarlo 4.0K Jun 17  2010 V2Data3
drwxrwxrwx  3 jjpr