__Author__: Bogdan Bintu

__Email__: bbintu@g.harvard.edu

__Date__:3/4/2020

In [1]:
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.size']=15
matplotlib.rcParams['font.family']='Arial'
import matplotlib.pylab as plt
import numpy as np
import os

### Load data

In [2]:
folder = r'Data'

fid = open(folder+os.sep+r'genomic-scale.tsv','r')
lines = np.array([ln[:-1].split('\t')for ln in fid if len(ln)>0])
zxy = np.array(lines[1:,:3][:],dtype=np.float)
fid = open(folder+os.sep+r'genomic-scale-with transcription and nuclear bodies.tsv','r')
lines = np.array([ln[:-1].split('\t')for ln in fid if len(ln)>0])
zxy = np.concatenate([zxy,np.array(lines[1:,:3][:],dtype=np.float)])
zxy = zxy.reshape([-1,2082,3])/1000 #transform to um

### Calculate A and B densities in single cells

In [3]:
### Main function for calculating trans densities.
def zxy_to_dens(zxy,deltas = [0.5],rgmed=5.8):# [0.1,0.25,0.5,0.75,1]
    """
    This computes the trans densities of A and B for each locus.
    It takes a list of cells with positions (in um), 
    and a list of cutoff distances (in um) 
    and a radius of gyration for normalizing to. This is the median radius of gyration of all spots across all cels.
    """
    
    import numpy as np
    from scipy.spatial.distance import pdist, squareform,cdist
    AB = 'B,B,A,A,B,B,A,A,A,B,A,A,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,A,B,B,B,B,B,B,B,B,A,nan,A,A,A,B,A,B,A,B,A,B,A,B,A,A,A,B,B,B,A,A,A,B,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,B,B,B,A,A,B,A,B,A,A,B,B,B,A,B,B,A,B,A,B,A,B,B,B,B,B,nan,A,B,A,B,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,B,B,A,B,B,A,A,A,B,B,A,B,A,A,B,B,A,B,B,B,B,A,A,B,A,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,A,B,B,A,A,A,B,B,A,B,B,A,A,B,B,B,B,B,A,B,nan,B,A,A,B,A,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,B,B,B,A,B,B,A,A,B,B,B,A,A,B,B,nan,A,A,B,B,B,B,B,B,B,B,B,A,B,B,B,A,B,B,B,B,A,B,A,A,A,B,B,B,A,A,B,B,A,B,B,A,B,B,B,B,B,A,B,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,A,B,B,nan,B,A,A,B,B,A,B,A,B,A,A,A,B,B,A,A,B,B,B,B,B,B,B,B,A,B,B,B,A,A,B,A,B,A,B,B,B,B,B,B,B,B,A,A,A,B,B,A,A,A,A,B,B,A,A,A,B,A,B,B,B,A,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,B,A,A,B,B,B,B,B,A,A,A,B,A,A,A,A,B,B,B,B,B,B,B,A,B,B,B,B,B,B,B,A,A,A,B,A,A,A,B,B,B,nan,B,A,B,B,A,A,A,A,B,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,A,A,B,B,B,B,B,B,B,B,B,A,B,B,A,B,B,B,A,B,B,A,A,nan,A,B,A,B,B,B,B,A,A,B,B,A,B,B,B,B,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,A,nan,B,B,B,B,B,B,B,B,A,B,B,A,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,A,B,B,B,A,B,A,A,A,B,B,B,A,A,B,nan,A,nan,A,B,B,B,B,B,A,A,A,A,B,B,A,B,A,B,B,A,B,B,B,B,B,B,B,B,B,B,A,B,A,A,B,B,B,A,B,B,A,A,B,B,B,A,nan,B,B,B,A,A,A,A,A,B,B,B,B,A,A,B,B,A,B,A,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,B,A,B,B,nan,B,B,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,A,A,B,B,B,A,B,B,B,B,B,A,B,B,A,nan,A,A,B,B,B,B,B,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,B,B,B,A,B,A,B,B,B,B,B,B,B,B,A,A,nan,nan,B,B,B,B,A,B,A,A,B,A,B,B,B,B,B,A,A,A,B,A,A,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,nan,B,B,B,A,B,B,B,A,A,B,B,B,B,B,A,A,A,A,A,B,B,B,A,A,B,nan,B,A,B,B,A,A,A,A,A,A,B,B,B,A,A,A,A,B,B,A,A,A,A,B,B,B,A,A,B,nan,nan,A,A,B,B,B,B,A,B,A,B,A,B,B,B,A,A,B,B,B,A,A,B,A,A,A,A,A,A,B,B,A,B,A,B,A,A,B,B,nan,nan,B,B,B,B,B,B,A,A,A,A,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,B,B,B,B,B,nan,nan,nan,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,A,nan,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,A,B,B,B,B,B,B,A,A,nan,nan,nan,nan,B,A,A,A,A,A,B,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,nan,A,A,A,A,A,A,A,A,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B'
    AB=np.array(AB.split(','))
    lens = [76, 80, 66, 63, 60, 55, 53, 48, 40, 43, 44, 44, 33, 30, 31, 30, 33, 33, 33, 33, 31, 31, 51]
    edges = np.cumsum([0]+lens)
    A,B = AB=='A',AB=='B'

    A,B = np.concatenate([A,A]),np.concatenate([B,B])
    A = np.where(A)[0]
    B = np.where(B)[0]
    dic_densA = [] #A density with the cells
    dic_densB = [] #B density with the cells
    dic_densA_norm = [] #normalized A density with the cells normalized to have the same scale (rg)
    dic_densB_norm = [] #normalized A density with the cells normalized to have the same scale (rg)

    M = squareform(pdist(zxy))
    rg = np.sqrt(np.nanmean(np.sum((zxy-np.nanmean(zxy,axis=0))**2,-1),-1))
    
    #exclude cis
    for i in range(len(lens)):
        M[edges[i]:edges[i+1],edges[i]:edges[i+1]]=np.nan
        M[edges[-1]+edges[i]:edges[-1]+edges[i+1],edges[-1]+edges[i]:edges[-1]+edges[i+1]]=np.nan

    MB,MA=M[:,B]**2,M[:,A]**2
    rgmed_rg_sq = (rgmed/rg)**2
    for delta in deltas:
        deltasq = 2*delta**2
        Bdens_,Adens_ = np.nansum(np.exp(-MB/deltasq*rgmed_rg_sq),axis=-1),np.nansum(np.exp(-MA/deltasq*rgmed_rg_sq),axis=-1)
        dic_densA_norm.append(Adens_)
        dic_densB_norm.append(Bdens_)
        Bdens_,Adens_ = np.nansum(np.exp(-MB/deltasq),axis=-1),np.nansum(np.exp(-MA/deltasq),axis=-1)
        dic_densA.append(Adens_)
        dic_densB.append(Bdens_)
    
    ### calculate for random A/B asigment
    
    AB = AB[np.random.permutation(len(AB))]
    A,B = AB=='A',AB=='B'
    A,B = np.concatenate([A,A]),np.concatenate([B,B])
    A = np.where(A)[0]
    B = np.where(B)[0]
    
    dic_densA_random = []
    dic_densB_random = []
    dic_densA_norm_random = []
    dic_densB_norm_random = []
    print M.shape,B.shape,A.shape
    MB,MA=M[:,B]**2,M[:,A]**2
    rgmed_rg_sq = (rgmed/rg)**2
    for delta in deltas:
        deltasq = 2*delta**2
        Bdens_,Adens_ = np.nansum(np.exp(-MB/deltasq*rgmed_rg_sq),axis=-1),np.nansum(np.exp(-MA/deltasq*rgmed_rg_sq),axis=-1)
        dic_densA_norm_random.append(Adens_)
        dic_densB_norm_random.append(Bdens_)
        Bdens_,Adens_ = np.nansum(np.exp(-MB/deltasq),axis=-1),np.nansum(np.exp(-MA/deltasq),axis=-1)
        dic_densA_random.append(Adens_)
        dic_densB_random.append(Bdens_)
    
    return dic_densA,dic_densB,dic_densA_norm,dic_densB_norm,dic_densA_random,dic_densB_random,dic_densA_norm_random,dic_densB_norm_random

### Run the density analysis

Note: This is slow, so I recommend running it in parallel using ipyparallel

First start a terminal in jupyter:
Got to:
http://localhost:8888/tree and click new>Terminal

and then input: >ipcluster start -n 40 

In [4]:
import ipyparallel as ipp
from ipyparallel import Client
rc = Client()
print(len(rc))

40


In [5]:
import time
start = time.time()
res = rc[:40].map_sync(zxy_to_dens,zxy)
end = time.time()
print(end-start)
res =np.array(res)

489.06400013


In [6]:
np.save(r'densityIMR90Untreated.npy',res)

#### Display the population average

In [None]:
keep = slice(None)
#res = resWT
AD = res[keep,0,0,:]
BD = res[keep,1,0,:]
Ad = np.nanmedian(AD.reshape([-1,1041]),0)
Bd = np.nanmedian(BD.reshape([-1,1041]),0)
ABratio = AD/BD
ABratio = np.nanmedian(ABratio.reshape([-1,1041]),0)

AD_rnd = res[keep,4,0,:]
BD_rnd = res[keep,5,0,:]
Ad_rnd = np.nanmedian(AD_rnd.reshape([-1,1041]),0)
Bd_rnd = np.nanmedian(BD_rnd.reshape([-1,1041]),0)
ABratio_rnd = AD_rnd/BD_rnd
ABratio_rnd = np.nanmedian(ABratio_rnd.reshape([-1,1041]),0)

AB = 'B,B,A,A,B,B,A,A,A,B,A,A,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,A,B,B,B,B,B,B,B,B,A,nan,A,A,A,B,A,B,A,B,A,B,A,B,A,A,A,B,B,B,A,A,A,B,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,B,B,B,A,A,B,A,B,A,A,B,B,B,A,B,B,A,B,A,B,A,B,B,B,B,B,nan,A,B,A,B,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,B,B,A,B,B,A,A,A,B,B,A,B,A,A,B,B,A,B,B,B,B,A,A,B,A,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,A,B,B,A,A,A,B,B,A,B,B,A,A,B,B,B,B,B,A,B,nan,B,A,A,B,A,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,B,B,B,A,B,B,A,A,B,B,B,A,A,B,B,nan,A,A,B,B,B,B,B,B,B,B,B,A,B,B,B,A,B,B,B,B,A,B,A,A,A,B,B,B,A,A,B,B,A,B,B,A,B,B,B,B,B,A,B,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,A,B,B,nan,B,A,A,B,B,A,B,A,B,A,A,A,B,B,A,A,B,B,B,B,B,B,B,B,A,B,B,B,A,A,B,A,B,A,B,B,B,B,B,B,B,B,A,A,A,B,B,A,A,A,A,B,B,A,A,A,B,A,B,B,B,A,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,B,A,A,B,B,B,B,B,A,A,A,B,A,A,A,A,B,B,B,B,B,B,B,A,B,B,B,B,B,B,B,A,A,A,B,A,A,A,B,B,B,nan,B,A,B,B,A,A,A,A,B,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,A,A,B,B,B,B,B,B,B,B,B,A,B,B,A,B,B,B,A,B,B,A,A,nan,A,B,A,B,B,B,B,A,A,B,B,A,B,B,B,B,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,A,nan,B,B,B,B,B,B,B,B,A,B,B,A,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,A,B,B,B,A,B,A,A,A,B,B,B,A,A,B,nan,A,nan,A,B,B,B,B,B,A,A,A,A,B,B,A,B,A,B,B,A,B,B,B,B,B,B,B,B,B,B,A,B,A,A,B,B,B,A,B,B,A,A,B,B,B,A,nan,B,B,B,A,A,A,A,A,B,B,B,B,A,A,B,B,A,B,A,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,B,A,B,B,nan,B,B,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,A,A,B,B,B,A,B,B,B,B,B,A,B,B,A,nan,A,A,B,B,B,B,B,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,B,B,B,A,B,A,B,B,B,B,B,B,B,B,A,A,nan,nan,B,B,B,B,A,B,A,A,B,A,B,B,B,B,B,A,A,A,B,A,A,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,nan,B,B,B,A,B,B,B,A,A,B,B,B,B,B,A,A,A,A,A,B,B,B,A,A,B,nan,B,A,B,B,A,A,A,A,A,A,B,B,B,A,A,A,A,B,B,A,A,A,A,B,B,B,A,A,B,nan,nan,A,A,B,B,B,B,A,B,A,B,A,B,B,B,A,A,B,B,B,A,A,B,A,A,A,A,A,A,B,B,A,B,A,B,A,A,B,B,nan,nan,B,B,B,B,B,B,A,A,A,A,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,B,B,B,B,B,nan,nan,nan,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,A,nan,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,A,B,B,B,B,B,B,A,A,nan,nan,nan,nan,B,A,A,A,A,A,B,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,nan,A,A,A,A,A,A,A,A,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B'
AB=np.array(AB.split(','))
lens = [76, 80, 66, 63, 60, 55, 53, 48, 40, 43, 44, 44, 33, 30, 31, 30, 33, 33, 33, 33, 31, 31, 51]
edges = np.cumsum([0]+lens)
A,B = AB=='A',AB=='B'


xmin,xmax,nbins=0.5,1,23


fig = plt.figure()
dif = ABratio[A]
dif = dif[(~np.isnan(dif))&(~np.isinf(dif))]
ct1=plt.hist(dif,bins=np.linspace(xmin,xmax,nbins),normed=True,color='r',alpha=0.75)
print('A',np.nanmedian(dif))


dif = ABratio[B]
dif = dif[(~np.isnan(dif))&(~np.isinf(dif))]
print('B',np.nanmedian(dif))
ct2=plt.hist(dif,bins=np.linspace(xmin,xmax,nbins),normed=True,color='b',alpha=0.75)


dif = ABratio_rnd#[A]
dif = dif[(~np.isnan(dif))&(~np.isinf(dif))]
dif_rnd=dif.copy()
ct1=plt.hist(dif,bins=np.linspace(xmin,xmax,nbins),normed=True,color='k',alpha=0.75)
print('Rnd',np.nanmedian(dif))
xvals=[0.5,0.75,1]
plt.xticks(xvals,xvals)
yvals = np.array([0,0.10,0.20])
plt.yticks(np.array(yvals)*np.sum(ct2[0]),(yvals*100).astype(int))
plt.xlabel("Median A/B density ratio")
plt.ylabel("% of loci")
plt.ylim([0,np.max(np.array(yvals)*np.sum(ct2[0]))])