In [1]:
import numpy as np
import h5py
from alabtools.utils import Index

In [2]:
# Load the data with h5py
h5_name = './igm-model_mcrb_2.5MB.sf.h5'
h5 = h5py.File(h5_name, 'r')

In [3]:
# Get the Index from the h5 file
index = Index(h5)

In [5]:
# Data of the index: chromstr, start, end, copy

# Print the types
print(type(index.chromstr))
print(type(index.start))
print(type(index.end))
print(type(index.copy))

# Print the shapes
print(index.chromstr.shape)
print(index.start.shape)
print(index.end.shape)
print(index.copy.shape)

# Print the first 10 elements
print(index.chromstr[:10])
print(index.start[:10])
print(index.end[:10])
print(index.copy[:10])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(2094,)
(2094,)
(2094,)
(2094,)
['chr1' 'chr1' 'chr1' 'chr1' 'chr1' 'chr1' 'chr1' 'chr1' 'chr1' 'chr1']
[       0  2500000  5000000  7500000 10000000 12500000 15000000 17500000
 20000000 22500000]
[ 2500000  5000000  7500000 10000000 12500000 15000000 17500000 20000000
 22500000 25000000]
[0 0 0 0 0 0 0 0 0 0]


In [6]:
# Print all the groups of the h5 file
print(list(h5.keys()))

['genome', 'icp', 'index', 'lamina', 'lamina_tsa', 'nucleoli', 'nucleoli_tsa', 'radial', 'rg', 'speckle', 'speckle_tsa', 'transAB']


In [7]:
# Get the single-cell matrix of the 'transAB' feature
feat_mat = h5['transAB']['matrix'][:]

In [8]:
# The feature matrix is a numpy array of shape (len(index), nstruct),
# where len(index) is the total number of beads, encompassing all chromosomes and copies

print(type(feat_mat))
print(feat_mat.shape)
print(feat_mat[:10, :10])

<class 'numpy.ndarray'>
(2094, 100)
[[0.2        0.4        0.8        0.70833333 0.22222222 0.375
  0.47222222 0.3255814  0.16666667 1.        ]
 [0.17241379 0.47727273 0.72727273 0.60526316 0.28947368 0.21875
  0.45714286 0.21666667 0.25       0.55      ]
 [0.19047619 0.425      0.61538462 0.5483871  0.29545455 0.35483871
  0.48484848 0.24193548 0.         0.51612903]
 [0.35714286 0.36111111 0.44444444 0.65       0.28301887 0.33333333
  0.54166667 0.25423729 0.2        0.5       ]
 [0.46666667 0.50847458 0.52631579 0.375      0.34146341 0.52631579
  0.3        0.24137931 0.28       0.57142857]
 [0.14285714 0.42857143 0.54545455 0.6        0.30769231 0.35714286
  0.41666667 0.25423729 0.30434783 0.56      ]
 [0.23809524 0.4        0.47619048 0.34615385 0.33333333 0.5
  0.52380952 0.23255814 0.4        0.61111111]
 [0.5        0.33962264 0.41935484 0.64864865 0.25925926 0.69230769
  0.25       0.32352941 0.40384615 0.42424242]
 [0.66666667 0.35714286 0.63636364 0.375      0.43478261 0.

In [9]:
# As an example, get the feature matrix of chromosome 7 copy B for structure 25
structID = 25
chrom = 'chr7'
copy = 1

data = feat_mat[np.logical_and(index.chromstr == chrom, index.copy == copy), structID]
print(data.shape)
print(data)

(59,)
[0.55263158 0.37142857 0.19148936 0.30555556 0.34782609 0.27027027
 0.37837838 0.42857143 0.5        0.35483871 0.32432432 0.43333333
 0.4        0.3902439  0.41818182 0.42       0.33333333 0.5
 0.69565217 0.4375     0.22222222 0.28070175 0.46938776 0.61702128
 0.32432432 0.65384615 0.66666667 0.26086957 0.3902439  0.42857143
 0.58       0.5        0.49090909 0.41269841 0.25396825 0.33333333
 0.39655172 0.39344262 0.3125     0.63414634 0.81578947 0.40540541
 0.23333333 0.71428571 0.54901961 0.73469388 0.72093023 0.47058824
 0.66666667 0.55555556 0.68421053 0.7962963  0.66666667 0.49152542
 0.63829787 0.62745098 0.60416667 0.5        0.65517241]
