In [1]:
import numpy as np
from tqdm import tqdm

In [2]:
arr = np.load("smallx.npy") #timestep 1001 to 2001
n_time, n_atoms, n_dim = arr.shape

In [3]:
atom_types = arr[0,:,0]
atom_types

array([2., 2., 2., ..., 3., 5., 3.], dtype=float32)

In [4]:
glimpse = arr [0]
atom_id = np.arange(n_atoms)+1


In [5]:
n_DNA = sum((atom_types == 2) | (atom_types == 1))# number of DNA beads
n_DNA

12000

In [6]:
n_tf = sum(atom_types == 5)
n_tf

631

In [7]:
condtion_L = ((atom_types == 3) & (atom_id % 3 == 1))
condtion_H = ((atom_types == 5) & (atom_id % 3 == 2)) # second statement in unnecessary
condtion_R = ((atom_types == 3) & (atom_id % 3 == 0))


In [8]:
polymer_DNA = arr[:,:n_DNA,1:]
left_legs = arr[:,condtion_L,1:]
right_legs = arr[:,condtion_R,1:]

In [9]:
left_legs

array([[[ 48.6666  ,  11.9886  ,  -0.261688],
        [  5.20975 , -17.8034  ,   4.36733 ],
        [ 69.9737  ,   2.12292 ,  -5.94867 ],
        ...,
        [-22.1812  ,   2.89288 ,  -9.22692 ],
        [-34.8047  , -10.0354  , -14.9116  ],
        [-26.9369  ,  -9.04961 ,  22.1474  ]],

       [[ 49.7307  ,  13.2222  ,  -1.36022 ],
        [  5.45081 , -17.8528  ,   4.35148 ],
        [ 70.3373  ,   2.03506 ,  -6.72295 ],
        ...,
        [-20.9018  ,   4.13637 ,  -8.09471 ],
        [-33.4827  , -10.9513  , -14.3501  ],
        [-24.2879  , -12.6193  ,  19.4587  ]],

       [[ 49.1416  ,  13.214   ,  -1.76017 ],
        [  5.39751 , -18.31    ,   4.78848 ],
        [ 70.2472  ,   2.97071 ,  -4.93835 ],
        ...,
        [-20.5273  ,   4.00936 ,  -8.84046 ],
        [-32.2858  ,  -9.43555 , -13.4375  ],
        [-24.1017  , -13.8589  ,  17.8987  ]],

       ...,

       [[ 51.6921  ,  12.4928  ,   2.77154 ],
        [  7.39572 , -19.9101  ,   5.35446 ],
        [ 41.6224  , -

In [25]:
def is_bound(arr_DNA:np.ndarray,tf:np.ndarray,threshold:float=0.8) -> np.ndarray:

    # tf is a single row 
    a = np.subtract(arr_DNA,tf) #distance in each (x,y,z) direction
    distance = np.linalg.norm(a,axis=1)#euclidian distance

    if np.any(np.less(distance,threshold)): # if less than threshold distance
        return True
    else:
        return False

In [26]:
def where_bound(arr_DNA:np.ndarray,tf:np.ndarray,threshold:float=0.8)->np.ndarray:

    """
    Return the index of the DNA atom where the given tf is bound.
    if tf is not bound to DNA at all, returns np.nan

    """
    n_dna = len(arr_DNA)
    index = np.arange(n_dna).astype(np.uint32)
    # tf is a single row 
    a = np.subtract(arr_DNA,tf) #distance in each (x,y,z) direction
    distance = np.linalg.norm(a,axis=1)#euclidian distance

    if np.any(np.less(distance,threshold)):
        return index[np.less(distance,threshold)][0] # only single one
    else:
        return np.nan

In [36]:
def find_positions(arr_DNA:np.ndarray,
                   arr_tf:np.ndarray)->np.ndarray:
    
    positions = np.zeros(len(arr_tf)) 
    for i, tf in enumerate(arr_tf):
        where = where_bound(arr_DNA,tf,threshold=0.9)
        positions[i] = where

    return positions

In [37]:
def find_positions_multistep(arr_DNA_ms:np.ndarray,arr_tf_ms:np.ndarray)->np.ndarray:

    n_timestep, n_tf, n_dim = arr_tf_ms.shape

    all_positions = np.zeros([n_tf,n_timestep]) # each col will be a timestep for rows (tfs)

    for i in range(n_timestep):
        all_positions[:,i] = find_positions(arr_DNA_ms[i],arr_tf_ms[i])
        print(f"step: {i+1} out of {n_timestep}", end='\r')

    return all_positions

In [38]:
positions_L = find_positions_multistep(polymer_DNA[:5],left_legs[:5])
positions_R = find_positions_multistep(polymer_DNA[:5],right_legs[:5])

step: 5 out of 5

In [39]:
positions_L

array([[ 1328.,   246., 11378.,   246.,   247.],
       [ 2209.,  2209.,  2209.,  2209.,  8039.],
       [10928., 10928., 10927., 11197., 10928.],
       ...,
       [ 3598.,  3244.,  3246.,  3246.,  3599.],
       [ 7098.,  7303.,  7100.,  6437.,    nan],
       [ 4332.,  8142.,  4336.,  8141.,  4335.]])