# c.f. It is different from saved_gt  and diced_gt

## Load a diced_gt

In [None]:
import json
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import os

from diced import DicedStore

store = DicedStore("gs://flyem-public-connectome")
repo_grayscale = store.open_repo("medulla7column")
repo_groundtruth = store.open_repo("medulla7column")


In [None]:
grayscale = repo_grayscale.get_array("grayscale")
groundtruth = repo_groundtruth.get_array("groundtruth")

In [None]:
xmin = 3253
xmax = 3773
ymin = 2103
ymax = 2623
zmin = 3490
zmax = 4010
origin_gt_cube = groundtruth[zmin:zmax, ymin:ymax, xmin:xmax] 

unique_origin_gt_cube = np.unique(origin_gt_cube)
origin_gt_cube.shape

## Load a saved_get

In [1]:
import h5py
import numpy as np
# conda install -c anaconda h5py
"""
Reference : https://stackoverflow.com/questions/27710245/is-there-an-analysis-speed-or-memory-usage-advantage-to-using-hdf5-for-large-arr
Saving 'thousand_gry' as np.array-from, its size is 2.7GB
but saving 'thousand_gry' as hdf-from, its size is 1.1GB
"""
def hdf_read(path, keyword):
    f = h5py.File(path, 'r')
    return f[keyword]

def hdf_write(path, keyword, data):
    with h5py.File(path, 'w') as outfile:
        dset = dset = outfile.create_dataset(keyword, data=data, chunks=True)

In [None]:
gt_path = '/data/git/ffn/third_party/neuroproof_examples/validation_sample/groundtruth.h5'
saved_gt = np.array(hdf_read(gt_path, 'stack'))

unique_saved_gt = np.unique(saved_gt)
saved_gt.shape

## check a saved_gt same with a diced_gt

In [None]:
from scipy import ndimage as ndi
from skimage import color

z_value = 30

color_labels = color.label2rgb(origin_gt_cube[z_value, :, :])
plt.imshow(color_labels, alpha=0.3)
# plt.savefig('raw+gt_%d_by_google.png' % z_value, transparent=True)
plt.show()

In [None]:
color_labels = color.label2rgb(saved_gt[z_value, :, :], alpha=0.1)

plt.imshow(color_labels, alpha=0.3)
# plt.savefig('raw+inference_%d_by_google.png' % z_value, transparent=True)
plt.show()

In [None]:
print(len(unique_saved_gt) == len(unique_origin_gt_cube))
print(len(unique_saved_gt))
print(len(unique_origin_gt_cube))

----------------------------------------------------------------------------------------------

# Save and load a diced_gt_from_server

In [None]:
import h5py
import numpy as np
# conda install -c anaconda h5py
"""
Reference : https://stackoverflow.com/questions/27710245/is-there-an-analysis-speed-or-memory-usage-advantage-to-using-hdf5-for-large-arr
Saving 'thousand_gry' as np.array-from, its size is 2.7GB
but saving 'thousand_gry' as hdf-from, its size is 1.1GB
"""
def hdf_read(path, keyword):
    f = h5py.File(path, 'r')
    return f[keyword]

import os
if not os.path.exists("./save/diced_gt_from_server.hdf"):
    with h5py.File("./save/diced_gt_from_server.hdf", "w") as outfile:
        dset = outfile.create_dataset('diced_gt_from_server', data=origin_gt_cube, chunks=True)
    diced_gt = origin_gt_cube
else:
    gt_path = "./save/diced_gt_from_server.hdf"
    diced_gt = np.array(hdf_read(gt_path, 'diced_gt_from_server'))

# Get neuron coordinates

In [None]:
import json
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# Open roi.json file and specify as a numpy array:

with open('json_repo/roi.json', 'r') as f:
    data = json.load(f)
    
roi_idx = np.array(data)
roi_idx[0:5]


In [None]:
# Create DataFrame from 'roi_idx':

col_name = ['z_start', 'y_start', 'x0_start', 'x1_start']
df = pd.DataFrame(roi_idx, columns=col_name)
df.head(5)


In [None]:
# Translate to voxel coorinates:

df_coord = df * 32
df_coord.head(5)


In [None]:
# Load the synapse.json file:

with open('json_repo/synapse.json', 'r') as f:
    synapse_json = json.load(f)
    
# Get the location of specific 'T-bar': 

data = synapse_json['data'][22]
loc_tbar = data['T-bar']['location']
print("The voxel coordinates of T-bar is [x, y, z] = " + str(loc_tbar))

# Get the locations of its 'partners':

partners = data['partners']
loc_part = [partners[i]['location'] for i in range(len(partners))]

for i, loc in enumerate(loc_part):
    print("The voxel coordinates of its partner {} is [x, y, z] = ".format(i) + str(loc_part[i]))


In [None]:
# Get all of the T-bar locations in synapse.json:

data = synapse_json['data']

tbar_loc_list = []

for i in range(len(data)):
    tbar = data[i]['T-bar']
    x, y, z = tbar['location']
    tbar_loc_list.append([i, tbar['body ID'], x, y, z])

# Create the DataFrame of T-bar:

col_name = ['data', 'body ID', 'loc (x)', 'loc (y)', 'loc (z)']

tbar_df = pd.DataFrame(tbar_loc_list, columns=col_name)
tbar_df['category'] = 'T-bar'
print('The DataFrame of tbar_df would have ' + str(len(tbar_df)) + ' of rows')
tbar_df.tail(5)


In [None]:
# Get all of the Partners locations corresponding to T-bars in synapse.json:

data = synapse_json['data']

partners_loc_list = []

for i in range(len(data)):
    partners = data[i]['partners']
    for j in range(len(partners)):
        x, y, z = partners[j]['location']
        partners_loc_list.append([i, partners[j]['body ID'], x, y, z])
    
# Create the DataFrame of Partners:

col_name = ['data', 'body ID', 'loc (x)', 'loc (y)', 'loc (z)']

partner_df = pd.DataFrame(partners_loc_list, columns=col_name)
partner_df['category'] = 'partner'

print('The DataFrame of partner_df would have ' + str(len(partner_df)) + ' of rows')
partner_df.tail(5)
    

In [None]:
synapse_df = tbar_df.append(partner_df).reset_index(drop=True)

synapse_df.tail(5)


In [None]:
# Get location of synapse inside of example:
xmin = 3253
xmax = 3773
ymin = 2103
ymax = 2623
zmin = 3490
zmax = 4010

x_cond = (synapse_df['loc (x)'] >= xmin) & (synapse_df['loc (x)'] <= xmax)
y_cond = (synapse_df['loc (y)'] >= ymin) & (synapse_df['loc (y)'] <= ymax)
z_cond = (synapse_df['loc (z)'] >= zmin) & (synapse_df['loc (z)'] <= zmax)

valid_synapse = synapse_df[x_cond & y_cond & z_cond]


In [None]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
    
ax.scatter(valid_synapse.iloc[:, 2],
           valid_synapse.iloc[:, 3],
           valid_synapse.iloc[:, 4], marker='o', label=valid_synapse.iloc[:,5], alpha=0.1)    
  
ax.set_zlim(zmin, zmax)
ax.set_ylim(ymin, ymax)
ax.set_xlim(xmin, xmax)
    
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.show()

## Save and load

In [3]:
import pickle

def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]

def save_object(input_object):
    name = namestr(input_object, globals())[0]
    with open('./save/' + name + '.dmp', 'wb') as f:
        pickle.dump(input_object, f)
        
def load_object(input_object):
    name = namestr(input_object, globals())[0]
    with open('./save/' + name + '.dmp', 'rb') as f:
        loaded_f = pickle.load(f)
    return loaded_f

# get a neuron_gt

In [2]:
import os
if not os.path.exists("./save/neuron_gt.dmp"):
    ##!! First, you have to laod a origin_gt_cube from diced server
    # Get neuron_coordinates
    neuron_coord_z = valid_synapse['loc (z)'].values
    neuron_coord_y = valid_synapse['loc (y)'].values
    neuron_coord_x = valid_synapse['loc (x)'].values

    assert len(neuron_coord_z) == len(neuron_coord_y) == len(neuron_coord_x)
    
    # Get a neuron_gt list
    neuron_gt = []
    for z,y,x in zip(neuron_coord_z, neuron_coord_y, neuron_coord_x):
        val = (int)(origin_gt_cube[z:z+1,y:y+1,x:x+1].flatten().tolist()[0])
        if not val in neuron_gt:
            neuron_gt.append(val)
    save_object(neuron_gt)
    print("saving neuron_gt complete")
else:
    neuron_gt=[] # Define a dummy variable to get a variable's name
    neuron_gt = load_object(neuron_gt)
    print("loading neuron_gt complete")

NameError: name 'load_object' is not defined

In [None]:
neuron_gt

In [None]:
# check all "body ID" made by neuron_gt
neuron_ids = valid_synapse['body ID'].values

for i in neuron_ids.tolist():
    if not i in neuron_gt:
        print("%d 없음" % i)

# Get neuron get cube and non_neuron_gt_cube

In [None]:
indice_bool = np.isin(diced_gt,neuron_ids)
indice = np.where(indice_bool)

In [None]:
indice

In [None]:
diced_gt[0,7,469] in neuron_gt

In [None]:
# neuron_gt_cube = np.zeros((diced_gt.shape), dtype=np.int)
# non_neuron_gt_cube = diced_gt.copy()

# for z,y,x in zip(indice[0], indice[1], indice[2]):
#     neuron_gt_cube[z,y,x] = diced_gt[z,y,x]
#     non_neuron_gt_cube[z,y,x] = 0
#     if not diced_gt[z,y,x] in neuron_gt :
#         print('% d is not in neuron_gt' % diced_gt[z,y,x])

In [None]:
neuron_gt_cube = np.zeros((diced_gt.shape), dtype=np.int)
non_neuron_gt_cube = diced_gt.copy()

neuron_gt_cube[indice] = diced_gt[indice]
non_neuron_gt_cube[indice] = 0

# Draw a neuron scatter plot

In [4]:
neuron_gt_cube_path = "./save/neuron_gt_cube.hdf"
non_neuron_gt_cube_path = "./save/non_neuron_gt_cube.hdf"

if not (os.path.exists("./save/neuron_gt_cube.hdf") and os.path.exists("./save/non_neuron_gt_cube.hdf")):
    hdf_write(neuron_gt_cube_path, "neuron_gt_cube", neuron_gt_cube)
    hdf_write(non_neuron_gt_cube_path, "non_neuron_gt_cube", non_neuron_gt_cube)
else:
    neuron_gt_cube = np.array(hdf_read(neuron_gt_cube_path, "neuron_gt_cube"))
    non_neuron_gt_cube = np.array(hdf_read(non_neuron_gt_cube_path, "non_neuron_gt_cube"))


In [5]:
"""
##!#!#!# Caution #!#!#!##
If you run this code, you will be wait long time
as you want to run this, unannotate last line
'neuron_gt_cube.vtk' is 10.0GB
"""
from tvtk.api import tvtk, write_data
# conda install -c anaconda mayavi 
def array2vtk(data, name):
    grid = tvtk.ImageData(spacing=(1,1,1), origin=(0, 0, 0), dimensions=data.shape)
    grid.point_data.scalars = data.ravel()
    grid.point_data.scalars.name = name

    write_data(grid, './save/'+ name + '.vtk')

********************************************************************************
         to build the TVTK classes (6.3). This may cause problems.
         Please rebuild TVTK.
********************************************************************************



In [6]:
array2vtk(neuron_gt_cube, "neuron_gt_cube")
array2vtk(non_neuron_gt_cube, "non_neuron_gt_cube")