In [1]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import scipy.io as sio
import matplotlib.pyplot as plt
import mkgu

In [2]:
dims_original = ["images", "reps", "time", "units"] # per Darren Seibert, email 2017-10-17
dims = ["stimulus", "repetition", "time_bin", "neuroid"]

## Paths

In [3]:
for_jonas_dir = "/braintree/data2/active/common/for_jonas"

In [4]:
for_jjpr_dir = "/braintree/data2/active/common/for_jjpr"

In [5]:
data_dir = "/braintree/home/jjpr/dev/scratch/mkgu_scratch/data"

## Load .mat files

In [6]:
for_jonas_dir_ls = [os.path.join(for_jonas_dir, x) for x in os.listdir(for_jonas_dir)]
for_jonas_dir_ls

['/braintree/data2/active/common/for_jonas/Chabo_IT_A_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_IT_A_HVM3.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_A_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_A_HVM3.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_IT_A_HVM6.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_A_HVM6.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_IT_M_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_IT_M_HVM3.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_M_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_IT_M_HVM6.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_M_HVM3.mat',
 '/braintree/data2/active/common/for_jonas/Tito_IT_M_HVM6.mat',
 '/braintree/data2/active/common/for_jonas/TitoR_IT_A_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_V4_HVM0.mat',
 '/braintree/data2/active/common/for_jonas/Chabo_V4_HVM3.mat',
 '/braintree/data2/active/common/fo

In [7]:
mats = {os.path.basename(mat_file)[:-4]: sio.loadmat(mat_file) for mat_file in for_jonas_dir_ls if mat_file.endswith(".mat")}

In [8]:
mats

{'Chabo_IT_A_HVM0': {'__globals__': [],
  '__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Fri Jan  6 17:04:41 2017',
  '__version__': '1.0',
  'bins': array([[[[1, 1, 0, ..., 1, 2, 1],
           [0, 1, 1, ..., 2, 1, 0],
           [1, 1, 1, ..., 0, 0, 0],
           ...,
           [0, 0, 2, ..., 2, 0, 1],
           [1, 0, 1, ..., 1, 1, 0],
           [0, 0, 1, ..., 0, 0, 1]],
  
          [[1, 1, 0, ..., 1, 0, 2],
           [0, 0, 1, ..., 0, 1, 0],
           [2, 0, 1, ..., 0, 1, 1],
           ...,
           [2, 1, 2, ..., 1, 0, 2],
           [0, 0, 1, ..., 1, 0, 0],
           [1, 0, 0, ..., 1, 0, 1]],
  
          [[1, 1, 1, ..., 2, 0, 3],
           [1, 2, 0, ..., 1, 0, 0],
           [1, 0, 1, ..., 2, 0, 2],
           ...,
           [0, 0, 0, ..., 0, 1, 1],
           [2, 0, 1, ..., 2, 0, 0],
           [1, 0, 0, ..., 1, 0, 0]],
  
          ...,
  
          [[0, 0, 1, ..., 0, 2, 0],
           [0, 0, 1, ..., 1, 0, 0],
           [0, 0, 0, ..., 1, 0, 0],


In [25]:
sorted([(mat, dict(zip(dims_original, mats[mat]["bins"].shape)), mats[mat].get("orig_elecs", np.array([])).squeeze()) for mat in mats])

[('Chabo_IT_A_HVM0',
  {'images': 650, 'reps': 109, 'time': 50, 'units': 40},
  array([16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 40, 41,
         42, 44, 48, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
         64, 87, 88, 91, 92, 94])),
 ('Chabo_IT_A_HVM3',
  {'images': 2570, 'reps': 45, 'time': 50, 'units': 40},
  array([16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 40, 41,
         42, 44, 48, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
         64, 87, 88, 91, 92, 94])),
 ('Chabo_IT_A_HVM6',
  {'images': 2570, 'reps': 67, 'time': 50, 'units': 40},
  array([16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 40, 41,
         42, 44, 48, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
         64, 87, 88, 91, 92, 94])),
 ('Chabo_IT_M_HVM0',
  {'images': 650, 'reps': 109, 'time': 50, 'units': 18},
  array([ 6,  8,  9, 10, 11, 12, 22, 23, 26, 27, 28, 30, 63, 65, 71, 77, 95,
         96])),
 ('Chabo_IT_M_HVM3',
  {'

## Functions For Building Assemblies

In [9]:
def coords_from_df(dim, df, name_map):
    coords_d = {}
    for col in name_map:
        col_ser = df[name_map[col]]
        if col_ser.dtype.kind in ["S", "O"]:
            col_ser = col_ser.astype("unicode")
        coords_d[col] = (dim, col_ser)
    return coords_d

In [10]:
# This is ad-hoc, based on the file name conventions in one particular directory at one particular time
def parse_mat_name(mat_name):
    spl = mat_name.split("_")
    
    if spl[0].endswith("R"):
        hemisphere = "R" 
        animal = spl[0][:-1]
    else:
        hemisphere = "L"
        animal = spl[0]
    
    region = spl[1]
    
    if len(spl) == 3:
        arr = "P"
    else:
        arr = spl[2]
        
    variation = int(spl[-1][-1])
        
    return {"animal": animal, "hemisphere": hemisphere, "region": region, "array": arr, "variation": variation}

In [11]:
def make_mat_name(animal, hemisphere, region, array, variation, **kwargs):
    result = [animal]
    if hemisphere == "R":
        result[0] = result[0] + "R"
    result.append(region)
    if region != "V4":
        result.append(array)
    result.append("HVM" + str(variation))
    return "_".join(result)

In [12]:
def get_coords_stimulus(mat):
    # Building coords for stimulus
    image_df_cols = {
        "image_index": range(len(mat["stm_file_names"])),
        "image_file_name": np.core.defchararray.strip(mat["stm_file_names"])
    }
    df_stm_file_names = pd.DataFrame(image_df_cols)
    simpler_meta_file = "/braintree/home/qbilius/.streams/hvm/meta.pkl"
    df_simpler_meta = pd.read_pickle(simpler_meta_file)
    joined_simpler = pd.merge(df_stm_file_names, df_simpler_meta, how="left", left_on="image_file_name",
                              right_on="filename").sort_values("image_index")

    stimulus_map = {
        'image_background_id': 'bg_id',
        'category_name': 'category',
        'image_file_name': 'image_file_name',
        'image_axis_index': "image_index",
        'image_id': 'id',
        'object_name': 'objname',
        'rxy': 'rxy',
        'rxy_semantic': 'rxy_semantic',
        'rxz': 'rxz',
        'rxz_semantic': 'rxz_semantic',
        'ryz': 'ryz',
        'ryz_semantic': 'ryz_semantic',
        's': 's',
        'image_size': 'size',
        'ty': 'ty',
        'tz': 'tz',
        'variation': 'var'
    }

    coords_stimulus = coords_from_df("stimulus", joined_simpler, stimulus_map)
    return coords_stimulus

In [13]:
def get_coords_repetition(mat):
    coords_repetition = {
        "repetition_index": ("repetition", range(mat["bins"].shape[1]))
    }
    return coords_repetition

In [14]:
def get_coords_time_bin():
    # ### Building coords for time bin
    time_bin_start = np.arange(0, 500, 10)
    coords_time_bin = {
        "time_bin_start": ("time_bin", time_bin_start),
        "time_bin_end": ("time_bin", time_bin_start + 10),
        "time_bin_center": ("time_bin", time_bin_start + 5),
    }
    return coords_time_bin

In [93]:
def get_coords_neuroid(mat, mat_name, map_dir):
    # Building coords for neuroid
    len_neuroid = mat["bins"].shape[3]
    coords_neuroid_uniform = parse_mat_name(mat_name)
    coords_neuroid_uniform.pop("variation")
    df_neuroid = pd.DataFrame(
        {field: [coords_neuroid_uniform[field]] * len_neuroid for field in coords_neuroid_uniform})
    if "orig_elecs" in mat:
        orig_elecs = mat["orig_elecs"].squeeze() - orig_elec_base_guesses[mat_name]
    else:
        orig_elecs = np.arange(len_neuroid)
    df_neuroid["original_electrodes"] = orig_elecs
    # find the .cmp or .txt or excel or .mat file corresponding to Tito_V4_HVM6.mat
    map_file_name = map_file_from_mat_name(map_dir, mat_name)
    electrode_map = sio.loadmat(map_file_name)
    electrode_row = electrode_map["row"].squeeze()
    electrode_column = electrode_map["col"].squeeze()
    df_electrode_map = pd.DataFrame({"row": electrode_row, "column": electrode_column})
    joined_orig_elecs = pd.merge(df_neuroid, df_electrode_map,
                                 how="left", left_on="original_electrodes", right_index=True)
    id_build = joined_orig_elecs["animal"] + ["_"] * len_neuroid
    id_build = id_build + joined_orig_elecs["hemisphere"] + ["_"] * len_neuroid
    id_build = id_build + joined_orig_elecs["array"] + ["_"] * len_neuroid
    id_build = id_build + joined_orig_elecs["row"].astype("unicode") + ["_"] * len_neuroid
    id_build = id_build + joined_orig_elecs["column"].astype("unicode")
    joined_orig_elecs["neuroid_id"] = id_build
    neuroid_map = {x: x for x in joined_orig_elecs.columns}
    coords_neuroid = coords_from_df("neuroid", joined_orig_elecs, neuroid_map)
    return coords_neuroid

In [32]:
# (mkgu_packaging) jjpr@braintree-cpu-1:~/dev/mkgu_packaging$ cat /braintree/data1/archive/common/mindhive/dicarlolab/proj/array_data/array/log/swcard_20110720/summary_selected128.txt
summary_selected128 = '''91 95 57 56  5 58 92 31 75 89 86 62 59 55  7 81 47 48 52 13 15 63 21 88  9 49  0 54 12 78 29 14 27 53 25 23 17 85 94  4
185 186 153 138 183 140 149 178 150 176 181 172 155 120 189 123 108 180 184 174 113 182 105 188 148 145 125 158 107 187 116 126 112 121 173 135 157 191 117 137 175 171 111 167 190 152 134 177 147 139 124 165 100 136 106 141 151 118 166 104 114 168 142 115 179 133 132 169 122 144
250 223 268 282 284 254 248 286 287 246 252 245 285 219 255 218 283 222
'''
"/braintree/data2/active/users/darren/mindhive/dicarlolab/u/darren/mworks_array_xmls/preproc/orig_hvm/swcards/swcard_20110720/summary_selected128.txt"
selected = [list(map(int, x.split())) for x in summary_selected128.splitlines()]
[(min(x), max(x)) for x in selected]

[(0, 95), (100, 191), (218, 287)]