# Expanse Notebook for Computational Analysis of Chromatin During Heart Development

This notebook will be used exclusively to perform expensive operations on Expanse. 

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import functions as f
from tqdm import tqdm

TTN_BIN = 4275
bin_map = f.load_bin_map('data/bin_map_human_100000.bed')
contact_matrix_zero = np.load('samples/contact_matrix_100kb_balanced_zeroed.npy')

In [4]:

def build_walk_index(contact_matrix):
    """
    Precompute for each node:
      - neighbors[i]: 1D int array of neighbors
      - cdfs[i]:      1D float array of cumulative probabilities
    """
    N = contact_matrix.shape[0]
    neighbors = [None]*N
    cdfs      = [None]*N

    for i in tqdm(range(N)):
        w = contact_matrix[i]
        idx = np.nonzero(w)[0]
        if idx.size == 0:
            neighbors[i] = np.empty(0, dtype=int)
            cdfs[i]      = np.empty(0, dtype=float)
        else:
            probs = w[idx] / w[idx].sum()
            neighbors[i] = idx
            cdfs[i]      = np.cumsum(probs)
    return neighbors, cdfs

neighbors, cdfs = build_walk_index(contact_matrix_zero) 



Python(64672) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
100%|██████████| 30894/30894 [00:10<00:00, 2945.33it/s]


### Random Walking Time Test


In [5]:
import functions as f

from time import time

start = time()


f.random_walk_fast(
    contact_matrix_zero, TTN_BIN, 5,
    neighbors=neighbors, cdfs=cdfs,
    num_molecules=10000, alpha=0.05
)
end = time()

print(f"Time taken: {end - start} seconds")

Time taken: 28.83447003364563 seconds


### Analytical Diffusion Test