In [1]:
import os

In [2]:
import re

In [3]:
import zipfile

In [4]:
import glob

In [None]:
import itertools

In [5]:
import numpy as np

In [6]:
import pandas as pd

In [7]:
import xarray as xr

In [8]:
import matplotlib.pyplot as plt

In [9]:
import mkgu

In [10]:
from mkgu.knownfile import KnownFile as kf

In [11]:
from mkgu.knownfile import FileRecord, Sighting

In [12]:
from mkgu.lookup import pwdb

In [13]:
from mkgu.assemblies import AssemblyModel, AssemblyStoreMap, AssemblyStoreModel

In [14]:
from mkgu.stimuli import ImageModel, AttributeModel, ImageMetaModel, StimulusSetModel, ImageStoreModel, \
    StimulusSetImageMap, ImageStoreMap

## Process .nc files

In [15]:
v2_base_path = "/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1"

In [16]:
nc_files = glob.glob(os.path.join(v2_base_path, "*/*/*.nc"), recursive=True)
sorted(nc_files)

['/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data11/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data12/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data13/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data14/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data15/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data16/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data17/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data18/NatRev/data.nc',
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data19/NatRev/data.nc',
 '/braintree/data2/acti

In [17]:
gd_arrays = {}
for f in nc_files:
    gd_arrays[f] = xr.open_dataarray(f)
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (image_file_name: 96000, neuroid: 10)>
 [960000 values with dtype=float64]
 Coordinates:
   * image_file_name  (image_file_name) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/stimuli//1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
     category_name    (image_file_name) float64 ...
     stimulusRepeats  (image_file_name) int64 ...
   * neuroid          (neuroid) object 'e0022' 'e0026' 'e0031' 'e0033' ...
     region           (neuroid) object ...
     animal           (neuroid) object ...,
 '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/data.nc': <xarray.DataArray (image_file_name: 83600, neuroid: 10)>
 [836000 values with dtype=float64]
 Coordinates:
   * image_file_name  (image_file_name) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data10/NatRev/stimuli//0c816a8354b91506395116bd8145f96b.jpg' 

In [18]:
for gd_array_key in gd_arrays:
    gd_array = gd_arrays[gd_array_key]
    gd_array = gd_array.T.rename({"image_file_name": "presentation"})
    gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[1]))
    gd_array.coords["neuroid_id"] = ("neuroid", gd_array["neuroid"].values)
    gd_arrays[gd_array_key] = gd_array
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (neuroid: 10, presentation: 96000)>
 array([[ 1.,  0.,  1., ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * presentation     (presentation) object '/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/stimuli//1b2a86f3f332f098b9eb2567fe8f04cd.jpg' ...
     category_name    (presentation) float64 ...
     stimulusRepeats  (presentation) int64 ...
   * neuroid          (neuroid) object 'e0022' 'e0026' 'e0031' 'e0033' ...
     region           (neuroid) object ...
     animal           (neuroid) object ...
     presentation_id  (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...
     neuroid_id       (neuroid) object 'e0022' 'e00

In [19]:
def massage_file_name(file_name):
    split = re.split("\\\\|/", file_name)
    split = [t for t in split if t]
    relative_path = os.path.join(*split[-5:])
    full_path = os.path.join("/", *split)
    basename = split[-1]
    exists = os.path.exists(full_path)
    sha1 = kf(full_path).sha1
    result = {
        "image_file_path_original": relative_path,
        "image_id": sha1
    }
    return result

In [None]:
for gd_array_key in gd_arrays:
    print(gd_array_key)
    gd_array = gd_arrays[gd_array_key]
    df_massage = pd.DataFrame(list(map(massage_file_name, gd_array["presentation"].values)))
    for column in df_massage.columns:
        gd_array.coords[column] = ("presentation", df_massage[column])
    gd_array.reset_index(["neuroid", "presentation"], drop=True, inplace=True)
gd_arrays

#### Combine arrays

In [32]:
neuroid_sum, presentation_sum = (0, 0)
for k in gd_arrays:
    neuroid_sum = neuroid_sum + gd_arrays[k].shape[0]
    presentation_sum = presentation_sum + gd_arrays[k].shape[1]
(neuroid_sum, presentation_sum)

(195, 3494103)

In [None]:
for gd_array_key in gd_arrays:
    gd_array = gd_arrays[gd_array_key]
    mkgu.assemblies.gather_indexes(gd_array)
gd_arrays

In [36]:
gd_arrays

{'/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/V2Data1/DGrat/data.nc': <xarray.DataArray (neuroid: 10, presentation: 96000)>
 array([[ 1.,  0.,  1., ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0022' 'e0026' 'e0031' ...
   * presentation              (presentation) MultiIndex
   - category_name             (presentation) float64 nan nan nan nan nan nan ...
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int6

In [43]:
np.nonzero(~np.isnan(list(gd_arrays.values())[0]))

<xarray.DataArray (neuroid: 2, presentation: 4312)>
array([[     0,      0,      0, ...,      9,      9,      9],
       [     0,      1,      2, ..., 158146, 158147, 158148]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0037' 'e0039' 'e0040' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) float64 nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data9/NatRev/stimuli/194cc7b0a35c9631d326864532eea21e.jpg' ...
  - image_id                  (presentation) object 'd74c5c31ea2625c047c980025127cbb7fcaabcac' ...

In [48]:
np.nonzero(~np.isnan(list(gd_arrays.values())[0].values))

(array([0, 0, 0, ..., 9, 9, 9]),
 array([     0,      1,      2, ..., 158146, 158147, 158148]))

In [44]:
aligned = xr.align(*list(gd_arrays.values()), join="outer")
aligned

  if np.issubdtype(dtype, float):


(<xarray.DataArray (neuroid: 135, presentation: 3494103)>
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]])
 Coordinates:
   * neuroid                   (neuroid) MultiIndex
   - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
   - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
   - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
   * presentation              (presentation) MultiIndex
   - category_name             (presentation) object nan nan nan nan nan nan ...
   - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
   - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
   - image_file_path_original  (presentation) object

In [28]:
aligned[0].shape

(135, 3494103)

In [33]:
~np.isnan(aligned[0])

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) object nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 .

In [47]:
[(~np.isnan(da)).any() for da in aligned]

[<xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False), <xarray.DataArray ()>
 array(False)]

In [45]:
np.nonzero(~np.isnan(aligned[0].values))

(array([], dtype=int64), array([], dtype=int64))

In [31]:
non_nan_indices = []
for da in aligned:
    non_nan_indices.append(np.flatnonzero(~np.isnan(da.values))) 
non_nan_indices

[array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64)]

In [None]:
# should all be False
for a, b in itertools.combinations(non_nan_indices, 2):
    print(np.in1d(a, b).any())

In [63]:
blank = np.full_like(aligned[0], np.nan)
blank

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [64]:
da_result = xr.DataArray(blank, coords=aligned[0].coords, dims=aligned[0].dims)
da_result

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) object nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGr

In [65]:
for da in aligned:
    da_result = da_result.combine_first(da)
da_result

<xarray.DataArray (neuroid: 135, presentation: 3494103)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * neuroid                   (neuroid) MultiIndex
  - region                    (neuroid) object 'V2' 'V2' 'V2' 'V2' 'V2' 'V2' ...
  - animal                    (neuroid) object 'e' 'e' 'e' 'e' 'e' 'e' 'e' ...
  - neuroid_id                (neuroid) object 'e0017' 'e0018' 'e0021' ...
  * presentation              (presentation) MultiIndex
  - category_name             (presentation) object nan nan nan nan nan nan ...
  - stimulusRepeats           (presentation) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
  - presentation_id           (presentation) int64 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
  - image_file_path_original  (presentation) object 'v2-1/V2Data1/DGr

In [1]:
def levels_for_index(xr_data, index):
    return xr_data.indexes[index].names

def all_index_levels(xr_data):
    nested = [levels_for_index(xr_data, index) for index in xr_data.indexes]
    return [x for inner in nested for x in inner]

In [2]:
da_result.reset_index(all_index_levels(da_blank), inplace=True)

NameError: name 'da_blank' is not defined

In [None]:
da_result.to_netcdf("/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1/crcns_v2-1_neuronal.nc")

In [26]:
dir()

['AssemblyModel',
 'AssemblyStoreMap',
 'AssemblyStoreModel',
 'AttributeModel',
 'FileRecord',
 'ImageMetaModel',
 'ImageModel',
 'ImageStoreMap',
 'ImageStoreModel',
 'In',
 'Out',
 'Sighting',
 'StimulusSetImageMap',
 'StimulusSetModel',
 '_',
 '_16',
 '_17',
 '_18',
 '_20',
 '_21',
 '_22',
 '_23',
 '_24',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i10',
 '_i11',
 '_i12',
 '_i13',
 '_i14',
 '_i15',
 '_i16',
 '_i17',
 '_i18',
 '_i19',
 '_i2',
 '_i20',
 '_i21',
 '_i22',
 '_i23',
 '_i24',
 '_i25',
 '_i26',
 '_i3',
 '_i4',
 '_i5',
 '_i6',
 '_i7',
 '_i8',
 '_i9',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 'aligned',
 'column',
 'df_massage',
 'exit',
 'f',
 'gd_array',
 'gd_array_key',
 'gd_arrays',
 'get_ipython',
 'glob',
 'itertools',
 'k',
 'kf',
 'massage_file_name',
 'mkgu',
 'nc_files',
 'neuroid_sum',
 'np',
 'os',
 'pd',
 'plt',
 'presentation_sum',
 'pwdb',
 'quit',
 're',
 'v2_base_path'

## Make the image zip file

In [29]:
df_image_meta = pd.DataFrame({"image_id": np.unique(da_result["image_id"].values)})

In [30]:
def first_dupe(sha1):
    fr = FileRecord.get(sha1=sha1)
    return fr.sightings[0].location
# order is not guaranteed, so on subsequent runs test that you got the same result, see below

In [31]:
df_image_meta["first_dupe"] = list(map(first_dupe, df_image_meta["image_id"]))
df_image_meta

Unnamed: 0,image_id,first_dupe
0,000035942277141353c41e43868fed37718f3264,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
1,0000365716935db4f6c4e8fe01784c0f6986f5a3,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
2,00003a9921d3b3db0e76898191289c7fe8150975,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
3,0000a8aa19d0a68f278f240b716d5bbc8f0c6691,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
4,00010bb8a48e7a0997cb31c21662d826603ee4a8,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
5,0001df478512ce4b526bb9f3649bd604d417ace2,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
6,00030a3bd87f598ce692026432462f552a545520,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
7,000539371e6d73f77563dc34e58cb6d361bebf00,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
8,00053e951981d431da7e75ac529e627adbceb054,/Users/jjpr/dev/scratch/gallant_data/gallant-V...
9,00058e96f1df6e5249a5f7ee6e9705e64a25c9c4,/Users/jjpr/dev/scratch/gallant_data/gallant-V...


In [51]:
def get_relative(path, base):
    split_path = path.split("/")
    split_base = base.split("/")
    target_path = "/".join(split_path[len(split_base):])
    return target_path

In [52]:
df_image_meta["relative_path"] = list(map(lambda x: get_relative(x, file_base), df_image_meta["first_dupe"]))
df_image_meta

Unnamed: 0,image_id,first_dupe,copied,relative_path
0,000035942277141353c41e43868fed37718f3264,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.24_Hz.1.imsm/3795.jpg
1,0000365716935db4f6c4e8fe01784c0f6986f5a3,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0170A/test.natrev.size.imsm/1182.jpg
2,00003a9921d3b3db0e76898191289c7fe8150975,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0148A/NatRev.MountLake.40.150.imsm/4229.jpg
3,0000a8aa19d0a68f278f240b716d5bbc8f0c6691,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.1.imsm/2280.jpg
4,00010bb8a48e7a0997cb31c21662d826603ee4a8,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0168B/test.natrev.clown.24sm.imsm/5085.jpg
5,0001df478512ce4b526bb9f3649bd604d417ace2,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0217B/test.natrev.forestferns.20_pix.imsm/485...
6,00030a3bd87f598ce692026432462f552a545520,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.24_Hz.1.imsm/5517.jpg
7,000539371e6d73f77563dc34e58cb6d361bebf00,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0169B/test.natrev.mountlake.imsm/1028.jpg
8,00053e951981d431da7e75ac529e627adbceb054,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0158A/test.natrev.mountlake.20sm.imsm/6297.jpg
9,00058e96f1df6e5249a5f7ee6e9705e64a25c9c4,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0217B/test.natrev.andros.20_pix.imsm/2320.jpg


In [40]:
target_zip_path = "/Users/jjpr/.mkgu/data/gallant.David2004/gallant_crcns_v1_stimuli.zip"

In [53]:
with zipfile.ZipFile(target_zip_path, 'w') as target_zip:
    for image in df_image_meta.itertuples():
        target_zip.write(image.first_dupe, arcname=image.relative_path)

In [54]:
containing_dir = os.path.dirname(target_zip_path)
with zipfile.ZipFile(target_zip_path, 'r') as new_zip:
    new_zip.extractall(containing_dir)

In [55]:
def copied(source):
    split = source.split("/")
    target = os.path.join(containing_dir, "/".join(split[8:]))
    return os.path.exists(target)

In [56]:
df_image_meta["copied"] = list(map(copied, df_image_meta["first_dupe"]))
df_image_meta

Unnamed: 0,image_id,first_dupe,copied,relative_path
0,000035942277141353c41e43868fed37718f3264,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.24_Hz.1.imsm/3795.jpg
1,0000365716935db4f6c4e8fe01784c0f6986f5a3,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0170A/test.natrev.size.imsm/1182.jpg
2,00003a9921d3b3db0e76898191289c7fe8150975,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0148A/NatRev.MountLake.40.150.imsm/4229.jpg
3,0000a8aa19d0a68f278f240b716d5bbc8f0c6691,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.1.imsm/2280.jpg
4,00010bb8a48e7a0997cb31c21662d826603ee4a8,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0168B/test.natrev.clown.24sm.imsm/5085.jpg
5,0001df478512ce4b526bb9f3649bd604d417ace2,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0217B/test.natrev.forestferns.20_pix.imsm/485...
6,00030a3bd87f598ce692026432462f552a545520,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0210A/test.natrev.24_Hz.1.imsm/5517.jpg
7,000539371e6d73f77563dc34e58cb6d361bebf00,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0169B/test.natrev.mountlake.imsm/1028.jpg
8,00053e951981d431da7e75ac529e627adbceb054,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0158A/test.natrev.mountlake.20sm.imsm/6297.jpg
9,00058e96f1df6e5249a5f7ee6e9705e64a25c9c4,/Users/jjpr/dev/scratch/gallant_data/gallant-V...,True,r0217B/test.natrev.andros.20_pix.imsm/2320.jpg


In [57]:
all(df_image_meta["copied"])

True

In [58]:
ls $target_base

[34mr0148A[m[m/ [34mr0156A[m[m/ [34mr0162B[m[m/ [34mr0168B[m[m/ [34mr0170A[m[m/ [34mr0208D[m[m/ [34mr0211A[m[m/ [34mr0215B[m[m/
[34mr0154B[m[m/ [34mr0158A[m[m/ [34mr0164C[m[m/ [34mr0169B[m[m/ [34mr0206B[m[m/ [34mr0210A[m[m/ [34mr0212B[m[m/ [34mr0217B[m[m/


## Make the StimulusSet lookup meta

In [23]:
pwdb.connect(reuse_if_open=True)

True

In [28]:
pwdb.create_tables(models=[ImageModel, AttributeModel, ImageMetaModel, StimulusSetModel, ImageStoreModel, StimulusSetImageMap, ImageStoreMap])

In [59]:
gallant_v1_images, created = StimulusSetModel.get_or_create(name="gallant.David2004")

In [60]:
gallant_v1_image_store, created = ImageStoreModel.get_or_create(location_type="S3", store_type="zip",
                                  location="https://mkgu-gallant-crcns.s3.amazonaws.com/gallant_crcns_v1_stimuli.zip")

In [61]:
eav_image_file_sha1 = AttributeModel(name="image_file_sha1", type="str")
eav_image_file_path_unique = AttributeModel(name="image_file_path_unique", type="str")

eav_image_file_sha1.save()
eav_image_file_path_unique.save()

1

In [62]:
for image in df_image_meta.itertuples():
    pw_image = ImageModel(image_id=image.image_id)
    pw_stimulus_set_image_map = StimulusSetImageMap(stimulus_set=gallant_v1_images, image=pw_image)
    pw_image_image_store_map = ImageStoreMap(image=pw_image, image_store=gallant_v1_image_store,
                                             path=image.relative_path)
    pw_image.save()
    pw_stimulus_set_image_map.save()
    pw_image_image_store_map.save()
    ImageMetaModel(image=pw_image, attribute=eav_image_file_sha1, value=str(image.image_id)).save()
    ImageMetaModel(image=pw_image, attribute=eav_image_file_path_unique, value=str(image.relative_path)).save()

In [63]:
gallant_v1_stimulus_set = mkgu.get_stimulus_set("gallant.David2004")
gallant_v1_stimulus_set

Unnamed: 0,id,image_id,image_file_sha1,image_file_path_unique
0,5761,000035942277141353c41e43868fed37718f3264,000035942277141353c41e43868fed37718f3264,r0210A/test.natrev.24_Hz.1.imsm/3795.jpg
1,5762,0000365716935db4f6c4e8fe01784c0f6986f5a3,0000365716935db4f6c4e8fe01784c0f6986f5a3,r0170A/test.natrev.size.imsm/1182.jpg
2,5763,00003a9921d3b3db0e76898191289c7fe8150975,00003a9921d3b3db0e76898191289c7fe8150975,r0148A/NatRev.MountLake.40.150.imsm/4229.jpg
3,5764,0000a8aa19d0a68f278f240b716d5bbc8f0c6691,0000a8aa19d0a68f278f240b716d5bbc8f0c6691,r0210A/test.natrev.1.imsm/2280.jpg
4,5765,00010bb8a48e7a0997cb31c21662d826603ee4a8,00010bb8a48e7a0997cb31c21662d826603ee4a8,r0168B/test.natrev.clown.24sm.imsm/5085.jpg
5,5766,0001df478512ce4b526bb9f3649bd604d417ace2,0001df478512ce4b526bb9f3649bd604d417ace2,r0217B/test.natrev.forestferns.20_pix.imsm/485...
6,5767,00030a3bd87f598ce692026432462f552a545520,00030a3bd87f598ce692026432462f552a545520,r0210A/test.natrev.24_Hz.1.imsm/5517.jpg
7,5768,000539371e6d73f77563dc34e58cb6d361bebf00,000539371e6d73f77563dc34e58cb6d361bebf00,r0169B/test.natrev.mountlake.imsm/1028.jpg
8,5769,00053e951981d431da7e75ac529e627adbceb054,00053e951981d431da7e75ac529e627adbceb054,r0158A/test.natrev.mountlake.20sm.imsm/6297.jpg
9,5770,00058e96f1df6e5249a5f7ee6e9705e64a25c9c4,00058e96f1df6e5249a5f7ee6e9705e64a25c9c4,r0217B/test.natrev.andros.20_pix.imsm/2320.jpg


## Make the DataAssembly lookup meta

In [64]:
pwdb.create_tables(models=[AssemblyModel, AssemblyStoreMap, AssemblyStoreModel])

In [65]:
assy = AssemblyModel(name="gallant.David2004", assembly_class="NeuronRecordingAssembly",
                     stimulus_set=gallant_v1_images)
assy.save()

1

In [66]:
store = AssemblyStoreModel(assembly_type="netCDF",
                           location_type="S3",
                           location="https://mkgu-gallant-crcns.s3.amazonaws.com/gallant_v1_single_electrode.nc")
store.save()

1

In [67]:
assy_store_map = AssemblyStoreMap(assembly_model=assy, assembly_store_model=store, role="gallant.David2004")
assy_store_map.save()

1

In [68]:
gallant_v1 = mkgu.get_assembly("gallant.David2004")
gallant_v1

<xarray.NeuronRecordingAssembly (neuroid: 23, presentation: 189305)>
array([[  0.,   0.,   0., ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,   0.,   0.,   0.]])
Coordinates:
  * neuroid                   (neuroid) object 'r0148A' 'r0150B' 'r0154B' ...
  * presentation              (presentation) MultiIndex
  - presentation_id           (presentation) int64 0 1 2 3 4 5 6 7 8 9 10 11 ...
  - image_file_path_original  (presentation) object 'r0148A/NatRev.MountLake.40.150.imsm/1.jpg' ...
  - image_id                  (presentation) object '33613a99bdc22fe498864bf32b798e018a961ed7' ...
  - id                        (presentation) int64 29882 71211 63228 49398 ...
  - image_file_sha1           (presentation) object '33613a99bdc22fe498864bf32b798e018a961ed7' ...
  - image_fi

In [69]:
len(np.unique(gallant_v1["image_file_path_original"].values))

189305

In [70]:
len(np.unique(gallant_v1["image_file_path_unique"].values))

120017