In [4]:
import h5py
import json
from os.path import join

In [5]:
f = h5py.File(join("data", "processed", "Homo_sapiens__HNF4A__cl__Caco-2.multires.mv5"), "r")

In [6]:
f.keys()

<KeysViewHDF5 ['chroms', 'info', 'resolutions']>

In [7]:
json.loads(f["resolutions"]["16384000"].attrs["row_infos"][0])

{'id': '372',
 'status': 'completed',
 'treats__0__cell_line__name': 'Caco-2',
 'treats__0__cell_type__name': 'Epithelium',
 'treats__0__cell_pop__name': None,
 'treats__0__disease_state__name': 'Colorectal Adenocarcinoma',
 'treats__0__factor__name': 'HNF4A',
 'treats__0__is_correcting': False,
 'treats__0__link': 'http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM575228',
 'treats__0__name': 'HNF4A ChIP-seq in proliferating cells',
 'treats__0__paper__journal__name': 'Dev. Cell',
 'treats__0__paper__lab': 'Shivdasani RA',
 'treats__0__paper__pmid': 21074721,
 'treats__0__paper__reference': 'Verzi MP, et al. Differentiation-specific histone modifications reveal dynamic chromatin interactions and partners for the intestinal transcription factor CDX2. Dev. Cell 2010',
 'treats__0__species__name': 'Homo sapiens',
 'treats__0__strain__name': None,
 'treats__0__tissue_type__name': 'Colon',
 'treats__0__unique_id': 'GSM575228',
 'qc__judge__map': True,
 'qc__judge__peaks': True,
 'qc__ju

In [8]:
def tileset_info(f):
    """
    Return some information about this tileset that will
    help render it in on the client.
    Parameters
    ----------
    filename: str
      The filename of the h5py file containing the tileset info.
    Returns
    -------
    tileset_info: {}
      A dictionary containing the information describing
      this dataset
    """
    # t1 = time.time()
    #f = h5py.File(filename, "r")
    # t2 = time.time()
    # a sorted list of resolutions, lowest to highest
    # awkward to write because a the numbers representing resolution
    # are datapoints / pixel so lower resolution is actually a higher
    # number
    resolutions = sorted([int(r) for r in f["resolutions"].keys()])[::-1]

    # the "leftmost" datapoint position
    # an array because higlass can display multi-dimensional
    # data
    min_pos = [0]
    max_pos = [int(sum(f["chroms"]["length"][:]))]

    # the "rightmost" datapoint position
    # max_pos = [len(f['resolutions']['values'][str(resolutions[-1])])]
    tile_size = int(f["info"].attrs["tile-size"])
    first_chrom = f["chroms"]["name"][0]

    shape = list(f["resolutions"][str(resolutions[0])]["values"][first_chrom].shape)
    shape[0] = tile_size

    # t3 = time.time()
    # print("tileset info time:", t3 - t2)

    tileset_info = {
        "resolutions": resolutions,
        "min_pos": min_pos,
        "max_pos": max_pos,
        "tile_size": tile_size,
        "shape": shape,
    }

    if "row_infos" in f["resolutions"][str(resolutions[0])].attrs:
        row_infos = f["resolutions"][str(resolutions[0])].attrs["row_infos"]
        tileset_info["row_infos"] = [r.decode("utf8") for r in row_infos]

    return tileset_info

In [9]:
tileset_info(f)

{'resolutions': [16384000,
  8192000,
  4096000,
  2048000,
  1024000,
  512000,
  256000,
  128000,
  64000,
  32000,
  16000,
  8000,
  4000,
  2000,
  1000],
 'min_pos': [0],
 'max_pos': [3209286105],
 'tile_size': 256,
 'shape': [256, 2],
 'row_infos': ['{"id": "372", "status": "completed", "treats__0__cell_line__name": "Caco-2", "treats__0__cell_type__name": "Epithelium", "treats__0__cell_pop__name": null, "treats__0__disease_state__name": "Colorectal Adenocarcinoma", "treats__0__factor__name": "HNF4A", "treats__0__is_correcting": false, "treats__0__link": "http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM575228", "treats__0__name": "HNF4A ChIP-seq in proliferating cells", "treats__0__paper__journal__name": "Dev. Cell", "treats__0__paper__lab": "Shivdasani RA", "treats__0__paper__pmid": 21074721, "treats__0__paper__reference": "Verzi MP, et al. Differentiation-specific histone modifications reveal dynamic chromatin interactions and partners for the intestinal transcription fac