In [1]:
from mphelper import ProcessWrapPool
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import biodataplot.metaplot as bpm
from biodata.bigwig import BigWigIReader
from biodata.bed import BEDXReader
from genomictools import GenomicCollection
import json

In [2]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))
import utils

In [3]:
import matplotlib
font_dir = Path.cwd().parent / "font"
for font in ["Arial.ttf", "Arial_Bold.ttf"]:
    matplotlib.font_manager.fontManager.addfont(font_dir / font)
matplotlib.rcParams["font.family"] = "Arial"
bfontsize = 12
sfontsize = 9

In [4]:
PROJECT_DIR_d = "/fs/cbsuhy02/storage/yc2553/yc2553/projects/TRE_directionality/PROcap/"
PROJECT_DIR_r = "/fs/cbsuhy02/storage/yc2553/yc2553/projects/TRE_directionality/resources/"
PROJECT_DIR_o = "/fs/cbsuhy02/storage/yc2553/yc2553/projects/TRE_directionality/output/"

# Generate feature matrix

## Get individual matrix

In [5]:
s = "C1"
ps = ["divergent", "unidirectional"]
ds = ["proximal", "distal"]
cs = ["", "_control"]
gs = [f"{p}_{d}{c}" for p, d, c in itertools.product(ps, ds, cs)]

In [6]:
bws = {"PHF8": f"{PROJECT_DIR_r}ENCODE/ENCFF092HFK.bigWig",
	   "SMAD5": f"{PROJECT_DIR_r}ENCODE/ENCFF094FAV.bigWig"
		}
ms = list(bws.keys())

In [7]:
df_features = {}
for g in gs:
	bed = f"{PROJECT_DIR_d}bed_plot/{s}_{g}.bed"
	if "control" in bed:
		all_regions = BEDXReader.read_all(GenomicCollection, bed)
		use_strand = False
	else:
		all_regions = BEDXReader.read_all(GenomicCollection, bed, strandfield="f7")
		use_strand = True
	index = [f"{r.chrom}_{r.chromStart}_{r.chromEnd}" for r in all_regions]

	for m in bws:
		df_features[(m,g)] = pd.DataFrame(bpm.generate_signal_profile(all_regions, BigWigIReader(bws[m]), fixed_size=1001, use_strand=use_strand), index=index)

In [8]:
df_reformat = {}
for k in df_features:
	df = df_features[k].reset_index()
	df_reformat[k] = pd.melt(df,
						  id_vars="index",
						  value_vars=list(df.columns[1:]),
						  var_name="Position",
						  value_name="Feature"
						  )

## Combine dataframes for plotting

In [9]:
df_metaplots = {}
for m, d in itertools.product(ms, ds):
	dfs = []
	for p in ps:
		df1 = df_reformat[(m, f"{p}_{d}")].copy()
		df1["Label"] = p.capitalize()
		df2 = df_reformat[(m, f"{p}_{d}_control")].copy()
		df2["Label"] = "Ctrl"
		dfs.extend([df1, df2])
	df_metaplots[(m,d)] = pd.concat(dfs).reset_index(drop=True)	

# Divergent vs. unidirectional

## Settings

In [10]:
labels = ["Ctrl", "Divergent", "Unidirectional"]
for m, d in itertools.product(ms, ds):
	maxs = []
	mins = []
	df = df_metaplots[(m,d)]
	for label in labels:
		maxs.append(df[df["Label"]==label].groupby("Position")["Feature"].mean().max())
		mins.append(df[df["Label"]==label].groupby("Position")["Feature"].mean().min())
	print(d, m, max(maxs), min(mins))

proximal PHF8 22.722070183348237 0.6881722265708423
distal PHF8 5.15912518238369 0.692488691482435
proximal SMAD5 5.425667955796219 0.5927417862067668
distal SMAD5 2.4410166774164646 0.5811491189730662


## Generate metaplots & heatmaps

In [11]:
def generate_metaplot_and_heatmap(d, ps, ms, df_metaplots, df_heatmaps, sort_file, ylims, yticks, cmap, outputfile, test, xlabel="Distance (kb)"):
	height_ratios = [5,5,5,0.8,0.5]
	fig, axes = plt.subplots(len(height_ratios), len(ms), figsize=(4, 5.2), gridspec_kw={'height_ratios': height_ratios})
	for col in range(len(ms)):
		axes[3,col].axis("off")
	labelpad = 2
	y_align = -0.2
	wspace = 0.3
	hspace = 0.2

	hue_order = ["Ctrl", "Divergent", "Unidirectional"]
	palette = ["#d9d9d9", "#313695", "#de77ae"]
	
	for m in ms:
		col = ms.index(m)
		for row in range(3):
			if len(ms) == 1:
				ax = axes[row]
			else:
				ax = axes[row, col]
				
			# Metaplot
			if row == 0:
				utils.generate_feature_metaplot(df_metaplots[(m,d)], palette, hue_order, ax, test)
				
				ax.set_ylim(ylims[ms.index(m)])
				ax.set_yticks(yticks[ms.index(m)])
				ax.tick_params(axis="y", labelsize=sfontsize, pad=labelpad)
				if col == 0:
					ax.set_ylabel("ChIP-seq", fontsize=bfontsize, fontweight="bold")
					ax.get_yaxis().set_label_coords(y_align, 0.5)
				else:
					ax.set_ylabel("")
					
				ax.set_title(m, fontsize=bfontsize, pad=labelpad+5, fontweight="bold")
				ax.legend([],[], frameon=False)
					
			# Heatmap
			else:
				if row == 2:
					cbar = True
					if len(ms) == 1:
						cbar_ax = axes[4]
					else:
						cbar_ax = axes[4, col]
				else:
					cbar = False
					cbar_ax = None		
				cbar_kws = {"ticks": yticks[ms.index(m)], "orientation": "horizontal"}
				
				utils.generate_feature_heatmap(df_heatmaps[(m,f"{ps[row-1]}_{d}")], yticks[ms.index(m)], cmap, cbar, cbar_ax, cbar_kws, ax, sort_file, test)

				if row == 2:
					cbar_ax.set_xticklabels(yticks[ms.index(m)])
					cbar_ax.tick_params(axis="x", labelsize=sfontsize, pad=labelpad)

				if col == 0:
					ax.set_ylabel(ps[row-1].capitalize(), fontsize=bfontsize, fontweight="bold")
					ax.get_yaxis().set_label_coords(y_align/2, 0.5)
				else:
					ax.set_ylabel("")
		
			xtick_list = [0,500,1000]
			xticklabel_list = ["-0.5", "0", "0.5"]
			ax.set_xlim([0, 1000])
			ax.set_xticks(xtick_list)
			if row == 2:
				ax.set_xticklabels(xticklabel_list)
				ax.set_xlabel(xlabel, fontsize=bfontsize, fontweight="bold")
				ax.tick_params(axis="x", labelsize=sfontsize, pad=labelpad)
			else:
				ax.set_xticklabels([])
				ax.set_xlabel("")

	fig.subplots_adjust(wspace=wspace, hspace=hspace)
	plt.savefig(outputfile, bbox_inches="tight", dpi=300) 

In [12]:
pwpool = ProcessWrapPool(len(ds))

# test = True
test = False
cmap = "viridis"
sort_file = f"{PROJECT_DIR_o}labels/{s}_distance_to_center.json"
for d in ds:
	if d == "distal":
		ylims = [[-1, 7], [-0.5, 3.5]]
		yticks = [[0, 2, 4, 6], [0, 1, 2, 3]]
	else:
		ylims = [[-2, 26], [-1, 7]]
		yticks = [[0, 8, 16, 24], [0, 2, 4, 6]]
	folder = "supp_figures/" if d == "proximal" else "other_figures/"
	outputfile = f"{PROJECT_DIR_o}{folder}SuppFig1e.png"
	pwpool.run(generate_metaplot_and_heatmap, args=[d, ps, ms, df_metaplots, df_features, sort_file, ylims, yticks, cmap, outputfile, test])

In [15]:
len(pwpool.finished_tasks)

2

In [16]:
pwpool.close()