In [2]:
import numpy as np
from alabtools.analysis import HssFile

In [3]:
# Create an instance of HssFile class and reading the
# 'igm-model_mcrb_2.5MB.hss' file with read-only access
hss = HssFile('igm-model_mcrb_2.5MB.hss', 'r')

Hss is a HDf5 file.

There are three main datasets: coordinates, index and genome.

In [8]:
# nbead and nstruct are examples of ATTRIBUTES of the HssFile class
print(hss.nbead)  # gives the number of beads in the model (for each copy of each chromosome)
print(hss.nstruct)  # gives the number of structures in the model

2094
100


In [9]:
# The coordinates are the first dataset of the HSS file.

# We read the coordinates and copy them into a new variable called coords
# We do this so that we can modify the coords variable without modifying the original data
coords = hss.coordinates

print(type(coords))
print(coords.shape)  # ndomain, nstruct, 3

# Let's access the bead n. 5 in structure n. 10 and print the Y coordinate
print(coords[4, 9, 1])  # in nanometers

<class 'numpy.ndarray'>
(2094, 100, 3)
532.129


In [10]:
# Axis collapsing: we are taking the data of a single structure and thus we collapse the second axis
coords_1 = coords[:, 0, :]  # coordinates of struct 1
print(type(coords_1))
print(coords_1.shape)

<class 'numpy.ndarray'>
(2094, 3)


In [11]:
# Slicing

# Let's take the data from structure n.1 to structure n. 10
coords_1_to_10 = coords[:, 0:10, :]  # 0 included but 10 excluded
print(type(coords_1_to_10))
print(coords_1_to_10.shape)

<class 'numpy.ndarray'>
(2094, 10, 3)


In [12]:
# Slicing shortcuts

# Slicing from the beginning to a certain index
coords_up_to_15 = coords[:, :15, :]  # 0 included but 15 excluded

# Slicing from a certain index to the end
coords_from_15 = coords[:, 15:, :]  # 15 included

In [13]:
# Compute distance of bead 1 in structure 1 to center
ctr = np.array([0., 0., 0.])  # the same in all structures
bead1_coords = coords[0, 0, :]  # coordinates of bead 1 in structure 1
# Compute distance between two points in 3D
# we just use the distance formule, which is the Pythagorean theorem in 3D
#    dist = sqrt((x1 - x2)^2 + (y1 - y2)^2 + (z1 - z2)^2) 
#        where (x1, y1, z1) are the coordinates of the first point and (x2, y2, z2) are the coordinates of the second point
dst_bead1_ctr = np.sqrt((bead1_coords[0] - ctr[0])**2 + (bead1_coords[1] - ctr[1])**2 + (bead1_coords[2] - ctr[2])**2)
print("Distance of bead 1 to center: {:.2f} nm".format(dst_bead1_ctr))

Distance of bead 1 to center: 2294.61 nm


In [10]:
# Genome object
genome = hss.genome
genome_chroms = genome.chroms
print(type(genome_chroms))
print(genome_chroms.shape)
print(genome_chroms)

<class 'numpy.ndarray'>
(21,)
['chr1' 'chr2' 'chr3' 'chr4' 'chr5' 'chr6' 'chr7' 'chr8' 'chr9' 'chr10'
 'chr11' 'chr12' 'chr13' 'chr14' 'chr15' 'chr16' 'chr17' 'chr18' 'chr19'
 'chrX' 'chrY']


In [12]:
genome_lengths = genome.lengths
print(type(genome_lengths))
print(genome_lengths.shape)
print(genome_lengths)
print("length of chromosome 1: {} bp".format(genome_lengths[0]))

<class 'numpy.ndarray'>
(21,)
[195471971 182113224 160039680 156508116 151834684 149736546 145441459
 129401213 124595110 130694993 122082543 120129022 120421639 124902244
 104043685  98207768  94987271  90702639  61431566 171031299  91744698]
length of chromosome 1: 195471971 bp


In [13]:
# Index object
index = hss.index
index_chroms = index.chromstr
print(type(index_chroms))
print(index_chroms.shape)
print(index_chroms)

<class 'numpy.ndarray'>
(2094,)
['chr1' 'chr1' 'chr1' ... 'chr19' 'chr19' 'chr19']


In [14]:
index_start = index.start
print(type(index_start))
print(index_start.shape)
print(index_start)

<class 'numpy.ndarray'>
(2094,)
[       0  2500000  5000000 ... 55000000 57500000 60000000]


In [15]:
index_end = index.end
print(type(index_end))
print(index_end.shape)
print(index_end)

<class 'numpy.ndarray'>
(2094,)
[ 2500000  5000000  7500000 ... 57500000 60000000 62500000]


In [16]:
resolution = index_end - index_start
print("Is resolution constant? {}".format(np.all(resolution == resolution[0])))
print("Resolution: {} bp".format(resolution[0]))

Is resolution constant? True
Resolution: 2500000 bp
