In [1]:
from holoviews.operation.datashader import datashade
import time, dask.array, h5py, visutil, util, DataSource, numpy as np, holoviews as hv

hv.extension('bokeh', width=90, logo=False)


# [USER] Specify parameters

In [12]:
# Input eigenvector file
with h5py.File('../output/eigensystem_2018_08_02_16_04_43.h5', 'r') as h5file:
    eigenvectors = np.array(h5file['eigenvectors'])

# Eigenvectors to show
dim0 = 2
dim1 = 3

# Values range to show
value_range = [0, 5]

# Raw data file list
raw_data_path = '/reg/d/psdm/amo/amo86615/res/haoyuan/diffusion/proj_000/input/file_list.txt'

# Sampling number 
sample_number = 500

# Number of patterns to show along each axis
num = 2

# Diffusion Map diemsion along each axis in pixel number
length = 300

# Dimension of the sampled patterns. Please use this default value
sample_size = int(length * 0.9 / num)


# [AUTO] Initialization

In [9]:
#########################################################
# [Auto] Load Data
#########################################################

# Create a data_source object to access the raw data in hdf5 files
data_source = DataSource.DataSourceFromH5pyList(source_list_file=raw_data_path)
pattern_shape = data_source.source_dict['shape']

# Create the global index map 
global_index_map = util.get_global_index_map(data_num_total=data_source.data_num_total,
                                             file_num=data_source.file_num,
                                             data_num_per_file=data_source.data_num_per_file,
                                             dataset_num_per_file=data_source.dataset_num_per_file,
                                             data_num_per_dataset=data_source.data_num_per_dataset)

#########################################################
# [Auto] Create holoviews object for all the data points
#########################################################
data_all_coor = np.zeros((data_source.data_num_total, 2))
data_all_coor[:, 0] = eigenvectors[dim0, :]
data_all_coor[:, 1] = eigenvectors[dim1, :]

points_all = hv.Scatter((eigenvectors[dim0, :], eigenvectors[dim1, :]))

# Datashade all the points.
background = datashade(points_all, dynamic=True)

#########################################################
# [Auto] Define streams related to all the data points
#########################################################
select = hv.Polygons([])
path_stream = hv.streams.PolyDraw(source=select)


# [AUTO] Sample the index and dataset

In [4]:
# Get some sampled index
sampled_index = np.sort(np.random.permutation(data_source.data_num_total)[:sample_number])

tic = time.time()
# Preload the sampled patterns
sampled_patterns = util.get_sampled_pattern_batch_efficient(global_index_array=sampled_index,
                                                            global_index_map=global_index_map,
                                                            data_dict=data_source.source_dict)
toc = time.time()
print("It takes {} seconds to sample {} patterns.".format(toc - tic, sample_number))


It takes 53.25032091140747 seconds to sample 2000 patterns.


# [AUTO] Sample the coordinates

In [13]:
# Get the coordinate of the sampled points
sample_position_holder = np.zeros((sample_number, 2))
sample_position_holder[:, 0] = data_all_coor[sampled_index, 0]
sample_position_holder[:, 1] = data_all_coor[sampled_index, 1]

# Create the holoviews for the sampled points 
sampled_points = hv.Points(sample_position_holder)

#########################################################
# [Auto]Define stream for the sampled points
#########################################################
check = hv.streams.Selection1D(source=sampled_points)


#########################################################
# [Auto] Define actions
#########################################################
# Write function that uses the selection indices to slice points and compute stats
def sample_from_selected_region(index):
    return visutil.assemble_patterns(col_num=num,
                                     index=index,
                                     row_num=num,
                                     data_holder=sampled_patterns,
                                     pattern_shape=pattern_shape,
                                     value_range=value_range)


# Handle for box selection
handle_check = hv.DynamicMap(sample_from_selected_region, streams=[check])


# [AUTO] Visualization

In [14]:
%%opts  Points [height=length,  width=length,  tools=['box_select',]] (size=4, color='red',nonselection_alpha=1,nonselection_color='yellow',selection_color='red'))
%%opts Scatter [height=length, width=length]
%%opts Image [height=sample_size, width=sample_size]
%%opts Polygons (line_width=5, line_color='green', line_alpha=1, fill_alpha=0.6)

# Create the final diagram
layout = (background*sampled_points*select  + handle_check.collate()).options(shared_axes=False)
layout


# [USER] Save selected polygon

In [13]:
output_address = '../output/selected_index.npy'

index, points = visutil.save_selected_region(stream_holder=path_stream,
                                             data_holder=data_all_coor,
                                             output=output_address,
                                             return_selected_region=True)


In [15]:
# Datashade the selected region
datashade(hv.Points(points), dynamic=True)
