In [1]:
import numpy as np
import pandas as pd
from alabtools.analysis import HssFile

In [2]:
hss = HssFile('igm-model_mcrb_2.5MB.hss', 'r')

In [3]:
coords = hss.coordinates #read coordinates of all structures
print(type(coords))
print(coords.shape)  # ndomain, nstruct, 3

<class 'numpy.ndarray'>
(2094, 100, 3)


In [4]:
coords_1 = coords[:, 0, :]  # coordinates of struct 1
print(type(coords_1))
print(coords_1.shape)

<class 'numpy.ndarray'>
(2094, 3)


In [5]:
# Compute distance of bead 1 to center
ctr = np.array([0., 0., 0.])
bead1_coords = coords_1[0, :]  # coordinates of bead 1
dst_bead1_ctr = np.sqrt((bead1_coords[0] - ctr[0])**2 + (bead1_coords[1] - ctr[1])**2 + (bead1_coords[2] - ctr[2])**2)
print("Distance of bead 1 to center: {:.2f} nm".format(dst_bead1_ctr))

Distance of bead 1 to center: 2294.61 nm


### Recap for last tutorial

1. Can you read this file into a hss object:? HCT116_Rao.test.hss
2. Can you see 
    1. How many structures are stored in this hss object? 
    2. How many beads are in each structure?
3. Can you extract 
    1. The coordinates of all bead in the second structure? 
    2. The coordinates of first bead in all structure?
    3. The coordinates of first bead in the second structure?


In [6]:
#Explore Genome class
genome = hss.genome
genome_assembly= genome.assembly
print(genome_assembly)

mm10


In [7]:
#check chromosome names
genome_chroms = genome.chroms
print(type(genome_chroms))
print(genome_chroms.shape)
print(genome_chroms)

<class 'numpy.ndarray'>
(21,)
['chr1' 'chr2' 'chr3' 'chr4' 'chr5' 'chr6' 'chr7' 'chr8' 'chr9' 'chr10'
 'chr11' 'chr12' 'chr13' 'chr14' 'chr15' 'chr16' 'chr17' 'chr18' 'chr19'
 'chrX' 'chrY']


In [8]:
#check chromosome lengths
genome_lengths = genome.lengths
print(type(genome_lengths))
print(genome_lengths.shape)
print(genome_lengths)
print("length of chromosome 1: {} bp".format(genome_lengths[0]))

<class 'numpy.ndarray'>
(21,)
[195471971 182113224 160039680 156508116 151834684 149736546 145441459
 129401213 124595110 130694993 122082543 120129022 120421639 124902244
 104043685  98207768  94987271  90702639  61431566 171031299  91744698]
length of chromosome 1: 195471971 bp


In [9]:
#guess what will genome.origin looks like?
genome_origins = genome.origins
#print(genome.origins)

In [10]:
# Explore Index class
index = hss.index
index_chroms = index.chromstr #what is the difference between index.chromstr and genome.chroms?
print(type(index_chroms))
print(index_chroms.shape)
print(index_chroms)

<class 'numpy.ndarray'>
(2094,)
['chr1' 'chr1' 'chr1' ... 'chr19' 'chr19' 'chr19']


In [11]:
#get index of index_chroms where index_chroms=='chr1'
index_chr1 = np.where(index_chroms=='chr1')
#index_chr1
#index_chroms[index_chr1]

In [12]:
#View the start and end of each beads
index_start = index.start
print(type(index_start))
print(index_start.shape)
print(index_start)

<class 'numpy.ndarray'>
(2094,)
[       0  2500000  5000000 ... 55000000 57500000 60000000]


In [13]:
index_end = index.end
print(type(index_end))
print(index_end.shape)
print(index_end)

<class 'numpy.ndarray'>
(2094,)
[ 2500000  5000000  7500000 ... 57500000 60000000 62500000]


In [14]:
resolution = index_end - index_start
#what will resolution look like?
print("Is resolution constant? {}".format(np.all(resolution == resolution[0])))

Is resolution constant? True


### Practice
How to find the bead index whose chr='chr1', start=2500000, end = 5000000 (hint: you may divide it into three smaller problems first, use np.where to find indices of elements that match particular string and and use np.intersect1d to help you find intersection of two arrays)

In [16]:
#use pandas to visualize the index object

#write struc information into a dictionary and convert it to pandas dataframe object
struc_info={'chr':index_chroms,'start':index_start,'end':index_end}
struc_info=pd.DataFrame(struc_info)
#visualize the table
struc_info['start'].dtype

dtype('int32')

### Practice
For the previous question, find the bead index whose chr='chr1', start=2500000, end = 5000000 in table struc_info. <br>(Hint: you may use struc_info[struc_info['chr']=='xxx'] to filter rows match particular condition 'xxx'), and use & to combine multiple conditions

### Class HssFile functions: <br>
Format: my_hss = HssFile(“filename”,’r’)<br>
        	 my_hss.function()

get_bead_crd(beads): <br>Gets bead ids as an input and prints out coordinates in each model as an output. The length of the array should be equal to nstruct.

In [18]:
bead_1_crd=hss.get_bead_crd(0)

array([[-1001.47   , -1418.1    ,  1500.43   ],
       [ -119.724  , -1331.97   ,  -319.539  ],
       [ -917.126  ,    24.7966 , -2026.25   ],
       [ 1411.59   ,  -162.014  ,  -220.577  ],
       [ 1163.19   ,  -491.541  , -1956.4    ],
       [  633.     ,   857.412  ,  1939.44   ],
       [  590.648  ,   352.885  ,  1439.75   ],
       [  657.714  ,    14.3107 , -1756.82   ],
       [ -785.333  ,  1840.45   ,  -787.647  ],
       [-2590.25   ,   159.615  ,  -908.967  ],
       [ 1259.56   ,  1184.48   , -1549.53   ],
       [-1203.09   , -1484.29   , -1095.     ],
       [-1050.82   ,  1480.47   , -1421.59   ],
       [-1466.54   ,  1666.43   ,  -725.582  ],
       [-1537.72   ,   608.001  , -1730.85   ],
       [-2203.73   ,  -158.846  , -1222.23   ],
       [-2694.65   ,  -261.49   ,  -521.263  ],
       [ -344.754  , -1162.4    ,  1847.54   ],
       [  233.879  , -1727.33   ,  1240.55   ],
       [-1427.63   ,  -824.598  ,   877.927  ],
       [ 1578.91   , -1803.96   ,   142.

get_struct_crd(models): <br>Gets struct ids as an input and prints out coordinates for all beads in the structure as an output. The length of the array should be equal to nbead.


In [19]:
hss.get_struct_crd([0])

array([[[-1001.47  , -1418.1   ,  1500.43  ]],

       [[-1173.11  , -1377.33  ,  1042.44  ]],

       [[-1072.83  , -1097.64  ,  1450.14  ]],

       ...,

       [[ -964.204 ,  -113.418 ,   116.495 ]],

       [[ -632.808 ,  -391.402 ,  -208.83  ]],

       [[ -805.402 ,  -839.024 ,    22.9564]]], dtype=float32)

getBeadRadialPositions(beads, nucleusRadius=5000): <br>Gets the array of bead ids as an input and prints out radial positions in each structure in the population. NucleusRadius is the radius of the whole genome structure (default = 5000.0).


In [None]:
hss.getBeadRadialPositions([0])

getChromBeadRadialPosition(chrnum, nucleusRadius=5000): <br>Gets the chrom id (0-indexed) as an input and prints out the radial positions of beads in that chromosome for each structure.


In [20]:
hss.getChromBeadRadialPosition(0)

array([[0.45892224, 0.27499694, 0.44485614, ..., 0.5090492 , 0.5389437 ,
        0.4549859 ],
       [0.41760775, 0.16299318, 0.4394453 , ..., 0.4108551 , 0.424862  ,
        0.440687  ],
       [0.4223119 , 0.17060232, 0.3592254 , ..., 0.32171032, 0.476618  ,
        0.39111   ],
       ...,
       [0.38942602, 0.12283513, 0.15309018, ..., 0.27479854, 0.35586652,
        0.31527287],
       [0.35884947, 0.09306116, 0.13190234, ..., 0.3186278 , 0.3956942 ,
        0.41596237],
       [0.34170452, 0.15958302, 0.20046648, ..., 0.2690601 , 0.45816317,
        0.4397109 ]], dtype=float32)