In [131]:
import h5py 
import math
import os
import os.path as op
import numpy as np

filename = '/tmp/my_file.multires'

# this is just so we can run this code
# multiple times without h5py complaining
if op.exists(filename):
    os.remove(filename)

# this will be the file that contains our multires data
f = h5py.File('/tmp/my_file.multires', 'w')
f.create_group('resolutions')

# start with a resolution of 1 element per pixel
curr_resolution = 1

# this will be our sample highest-resolution array
# and it will be stored under the resolutions['1']
# dataset
array_length = 100000
f['resolutions'].create_dataset(str(curr_resolution), (100000,))
f['resolutions']['1'][:] = np.array(range(array_length))

# the tile size that we want higlass to use
# unless this needs to be drastically different, there's no
# reason to change it
tile_size = 1024

# the maximum zoom level corresponds to the number of aggregations
# that need to be performed so that the entire extent of
# the dataset fits into one tile
max_zoom = math.ceil(math.log(array_length / tile_size) / math.log(2))

# we're going to go through and create the data for the different
# zoom levels by summing adjacent data points
prev_resolution = curr_resolution

for i in range(max_zoom):
    # each subsequent zoom level will have half as much data
    # as the previous
    curr_resolution = prev_resolution * 2
    
    next_level_length = math.ceil(
        len(f['resolutions'][str(prev_resolution)]) / 2)
    
    f['resolutions'].create_dataset(str(curr_resolution), 
                                    (next_level_length,))
    old_data = f['resolutions'][str(prev_resolution)][:]
    
    if len(old_data) % 2 != 0:
        # we need our array to have an even number of elements
        # so we just add the last element again
        old_data = np.append(old_data, [old_data[-1]])
        
    # actually sum the adjacent elements
    new_data = old_data.reshape(-1,2).sum(axis=1)
    
    print("zoom_level:", max_zoom - 1 - i, 
          "resolution:", curr_resolution, 
          "new_data length", len(new_data))
    f['resolutions'][str(curr_resolution)][:] = new_data
    
    prev_resolution = curr_resolution

zoom_level: 6 resolution: 2 new_data length 50000
zoom_level: 5 resolution: 4 new_data length 25000
zoom_level: 4 resolution: 8 new_data length 12500
zoom_level: 3 resolution: 16 new_data length 6250
zoom_level: 2 resolution: 32 new_data length 3125
zoom_level: 1 resolution: 64 new_data length 1563
zoom_level: 0 resolution: 128 new_data length 782


In [179]:
def get_tileset_info(f):
    '''
    Return some information about this tileset that will
    help render it in on the client.
    
    Parameters
    ----------
    f: h5py.File
        The f for which we're returning tileset info
    
    Returns
    -------
    tileset_info: {}
        A dictionary containing the information describing
        this dataset
    '''
    # a sorted list of resolutions, lowest to highest
    # awkward to write because a the numbers representing resolution
    # are datapoints / pixel so lower resolution is actually a higher
    # number
    resolutions = sorted([int(r) for r in f['resolutions'].keys()])[::-1]
    
    # the "leftmost" datapoint position
    # an array because higlass can display multi-dimensional
    # data
    min_pos = [0]
    
    # the "rightmost" datapoint position
    max_pos = [len(f['resolutions'][str(resolutions[-1])])]
    
    return {
        'resolutions': resolutions,
        'min_pos': min_pos, 
        'tile_size': tile_size
    }

In [180]:
print(get_tileset_info(f))

{'resolutions': [128, 64, 32, 16, 8, 4, 2, 1], 'min_pos': [0], 'tile_size': 1024}


In [183]:
def get_tile(f, tile_pos):
    '''
    Return a data tile from a multires file.
    
    Parameters
    ----------
    f: h5py.File
        A multires datafile
    tile_pos: []
        An array containing the zoom level and tile
        position (e.g. [0,3])
        
    Returns
    -------
    data: []
        The data for that tile as an array
    '''
    tileset_info = get_tileset_info(f)
    
    # which resolution does this zoom level correspond to?
    resolution = tileset_info['resolutions'][tile_pos[0]]
    tile_size = tileset_info['tile_size']
    
    # where in the data does the tile start and end
    tile_start = tile_pos[1] * tile_size
    tile_end = tile_start + tile_size

    return f['resolutions'][str(resolution)][tile_start:tile_end]

In [184]:
print("len(get_tile)", len(get_tile(f, [0,0])))

len(get_tile) 782
