__Author__: Bogdan Bintu

__Email__: bbintu@g.harvard.edu

__Date__:3/4/2020

In [None]:
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.size']=15
matplotlib.rcParams['font.family']='Arial'
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
data_folder = r'\data' #This is the folder containing the .tsv data files
save_data = r'\data'

### 1. Load and organize transcription data

In [None]:
folder = data_folder
fid = open(folder+os.sep+r'genomic-scale-with transcription and nuclear bodies.tsv','r')
lines = np.array([ln[:-1].split('\t')for ln in fid if len(ln)>0])
zxy = np.array(lines[1:,:3][:],dtype=np.float)
zxy = zxy.reshape([-1,2082,3])/1000 #transform to um
head = list(lines[0])
experiment = np.array(lines[1::2082,head.index('experiment number')].astype(int))-3

In [None]:
Transcr = []#np.array(['on' in e for e in lines[1:,8]])
for e in lines[1:,8]:
    if 'on' in e:
        Transcr.append(1)
    elif (e=='') or (e=='nan'):
        Transcr.append(np.nan)
    else:
        Transcr.append(0)
Transcr_WT = np.array(Transcr).reshape([-1,1041])

#### Load A/B density

In [None]:
#Please see Part 2 how to calculate densities
resWT = np.load(save_data+r'\densityIMR90Untreated.npy')
keep = slice(None)
AD = resWT[keep,0,0,:]
BD = resWT[keep,1,0,:]
Dratio_WT = AD/BD
Dratio_WT = Dratio_WT.reshape([-1,1041])[-len(Transcr_WT):]
good_WT = (~np.isnan(zxy[:,:,0])).reshape([-1,1041])

### 2. Calculate the transcriptional firing rate of A and B genes

In [None]:
AB = 'B,B,A,A,B,B,A,A,A,B,A,A,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,A,B,B,B,B,B,B,B,B,A,nan,A,A,A,B,A,B,A,B,A,B,A,B,A,A,A,B,B,B,A,A,A,B,B,A,B,B,A,B,B,B,B,B,B,B,A,B,B,A,A,B,B,B,A,A,B,A,B,A,A,B,B,B,A,B,B,A,B,A,B,A,B,B,B,B,B,nan,A,B,A,B,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,B,B,A,B,B,A,A,A,B,B,A,B,A,A,B,B,A,B,B,B,B,A,A,B,A,B,A,B,B,A,B,B,B,B,A,B,B,A,B,A,A,B,B,A,A,A,B,B,A,B,B,A,A,B,B,B,B,B,A,B,nan,B,A,A,B,A,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,B,B,B,A,B,B,A,A,B,B,B,A,A,B,B,nan,A,A,B,B,B,B,B,B,B,B,B,A,B,B,B,A,B,B,B,B,A,B,A,A,A,B,B,B,A,A,B,B,A,B,B,A,B,B,B,B,B,A,B,A,B,A,B,B,A,B,B,B,B,B,B,B,A,B,A,B,B,nan,B,A,A,B,B,A,B,A,B,A,A,A,B,B,A,A,B,B,B,B,B,B,B,B,A,B,B,B,A,A,B,A,B,A,B,B,B,B,B,B,B,B,A,A,A,B,B,A,A,A,A,B,B,A,A,A,B,A,B,B,B,A,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,B,A,A,B,B,B,B,B,A,A,A,B,A,A,A,A,B,B,B,B,B,B,B,A,B,B,B,B,B,B,B,A,A,A,B,A,A,A,B,B,B,nan,B,A,B,B,A,A,A,A,B,B,A,B,A,A,A,A,B,B,A,B,B,B,A,B,A,A,B,B,B,B,B,B,B,B,B,A,B,B,A,B,B,B,A,B,B,A,A,nan,A,B,A,B,B,B,B,A,A,B,B,A,B,B,B,B,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,A,nan,B,B,B,B,B,B,B,B,A,B,B,A,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,A,B,A,A,B,B,A,A,A,A,B,B,B,A,B,A,A,A,B,B,B,A,A,B,nan,A,nan,A,B,B,B,B,B,A,A,A,A,B,B,A,B,A,B,B,A,B,B,B,B,B,B,B,B,B,B,A,B,A,A,B,B,B,A,B,B,A,A,B,B,B,A,nan,B,B,B,A,A,A,A,A,B,B,B,B,A,A,B,B,A,B,A,B,A,B,A,B,B,B,B,A,A,B,B,B,B,B,B,A,B,B,nan,B,B,B,A,A,A,A,B,B,A,B,B,B,A,B,B,B,A,A,B,B,B,A,B,B,B,B,B,A,B,B,A,nan,A,A,B,B,B,B,B,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,A,A,B,B,B,B,A,B,A,B,B,B,B,B,B,B,B,A,A,nan,nan,B,B,B,B,A,B,A,A,B,A,B,B,B,B,B,A,A,A,B,A,A,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,B,B,A,B,B,nan,B,B,B,A,B,B,B,A,A,B,B,B,B,B,A,A,A,A,A,B,B,B,A,A,B,nan,B,A,B,B,A,A,A,A,A,A,B,B,B,A,A,A,A,B,B,A,A,A,A,B,B,B,A,A,B,nan,nan,A,A,B,B,B,B,A,B,A,B,A,B,B,B,A,A,B,B,B,A,A,B,A,A,A,A,A,A,B,B,A,B,A,B,A,A,B,B,nan,nan,B,B,B,B,B,B,A,A,A,A,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,B,B,B,B,B,nan,nan,nan,A,A,A,B,B,B,B,B,B,A,B,B,B,B,B,B,A,nan,B,B,nan,nan,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,A,B,B,B,B,B,B,A,A,nan,nan,nan,nan,B,A,A,A,A,A,B,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,B,B,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,nan,A,A,A,A,A,A,A,A,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B'
AB=np.array(AB.split(','))
lens = [76, 80, 66, 63, 60, 55, 53, 48, 40, 43, 44, 44, 33, 30, 31, 30, 33, 33, 33, 33, 31, 31, 51]
edges = np.cumsum([0]+lens)
A,B = AB=='A',AB=='B'

In [None]:
from tqdm import tqdm_notebook as tqdm
dic_tr_A = {}
dic_tr_B = {}
lines_ = lines[1:,7:9].reshape([-1,1041,2])
lines_A = lines_[:,A].reshape([-1,2])
lines_B = lines_[:,B].reshape([-1,2])
for nms,trs in tqdm(lines_A):
    if trs!= 'nan' and nms!='':
        for gn,tr in zip(nms.split(','),trs.split(',')):
            dic_tr_A[gn] = dic_tr_A.get(gn,[])+[tr]
for nms,trs in tqdm(lines_B):
    if trs!= 'nan' and nms!='':
        for gn,tr in zip(nms.split(','),trs.split(',')):
            dic_tr_B[gn] = dic_tr_B.get(gn,[])+[tr]

In [None]:
frsB = np.array([np.mean(np.array(dic_tr_B[key])=='on') for key in dic_tr_B])#/np.log(10)
frsA = np.array([np.mean(np.array(dic_tr_A[key])=='on') for key in dic_tr_A])#/np.log(10)
fig=plt.figure()

plt.hist(frsA,bins=10**(np.linspace(-2.5,0,20)),color='r',alpha=0.75,label='A regions')
plt.hist(frsB,bins=10**(np.linspace(-2.5,0,20)),color='b',alpha=0.75,label='B regions')
plt.gca().set_xscale("log")
plt.legend()
plt.xlabel('Bursting frequency')
plt.ylabel('Number of genes')
plt.show()

### 3. Calculate A/B density ratio as a function of transcriptional state

In [None]:
Transcr =Transcr_WT==1
DratioT = Dratio_WT.copy()
DratioT[~(Transcr&good_WT)]=np.nan
dT = np.nanmedian(DratioT,0)
DratioS = Dratio_WT.copy()
DratioS[~((~Transcr)&good_WT)]=np.nan
dS = np.nanmedian(DratioS,0)
logTS = np.log(dT/dS)/np.log(2)
nanlogTS = logTS[~np.isnan(logTS)]
nanlogTS_sort = np.sort(nanlogTS)
fig = plt.figure()
fr=1.*np.sum(nanlogTS_sort>0)/len(nanlogTS_sort)
plt.title('Trascribing/Silent\n'+str(np.round(fr,2))+' increase')
plt.plot(nanlogTS_sort,'o',color='orange')
plt.plot([0,len(nanlogTS_sort)],[0,0],lw=4,color='k')
plt.ylabel('Log2 difference of \ntrans A/B density ratio')
plt.xlabel('Locus index')

### 4. Calculate the transcriptional state as a function of A/B density

In [None]:
Transcr =Transcr_WT==1
perc_75 = np.nanpercentile(Dratio_WT,75,axis=0)
perc_25 = np.nanpercentile(Dratio_WT,25,axis=0)
Tr_high = (Transcr&good_WT)
keep = (Dratio_WT>perc_75)&good_WT
Tr_high[~keep] = 0
fr_high = 1.*np.sum(Tr_high,axis=0)/np.sum(keep,axis=0)

Tr_low = (Transcr&good_WT)
keep = (Dratio_WT<perc_25)&good_WT
Tr_low[~keep] = 0
fr_low = 1.*np.sum(Tr_low,axis=0)/np.sum(keep,axis=0)


logTS = np.log(fr_high/fr_low)/np.log(2)
nanlogTS = logTS[~np.isnan(logTS)]
nanlogTS_sort = np.sort(nanlogTS)

fr=1.*np.sum(nanlogTS_sort>0)/len(nanlogTS_sort)

fig = plt.figure()
plt.title('High/low trans A/B density ratio\n'+str(np.round(fr,2))+' increase')
plt.plot(nanlogTS_sort,'o',color='orange')
plt.plot([0,len(nanlogTS_sort)],[0,0],lw=4,color='k')
plt.ylabel('Log2 difference of firing rate')
plt.xlabel('Locus index')