In [1]:
import sys
import numpy as np
import pandas as pd
import re
import pyBigWig
sys.path.append("../../Utils")
from loaders import HNSCCFeatureHandler

METADATA_PATH = "../../Supplementary_Tables/ST1/RAW_HNSCC_METADATA_NEW_v10.csv"
VALID_IDS_PATH = "../../Utils/Lists/cv_ids.txt"
HOLD_IDS_PATH = '../../Utils/Lists/holdout_ids.txt'

hc_data = HNSCCFeatureHandler(METADATA_PATH, VALID_IDS_PATH, HOLD_IDS_PATH)
_ = hc_data.load_feature_to_dataframe("../../Data/*.mds.10kb.midpoints.tsv", 0, 4)
t = hc_data.normalize_zscore()
_ = hc_data.merge_feature_metadata()
z = hc_data.batch_correct()

In [2]:
responder = hc_data.get_raw_features(hc_data.get_subset(hc_data.responder_ids))
keyfunc = lambda x: tuple(int(n) for n in re.findall(r'\d+', x))
sorted_cols = sorted(responder.columns, key=keyfunc)
_COL_RE = re.compile(r'chr([^:]+):(\d+)-(\d+)')

In [3]:
responder = hc_data.get_raw_features(hc_data.get_subset(hc_data.responder_ids))[sorted_cols].mean(axis=0)
non_responder = hc_data.get_raw_features(hc_data.get_subset(hc_data.non_responder_ids))[sorted_cols].mean(axis=0)
responder_screen = hc_data.get_raw_features(hc_data.get_subset(hc_data.responder_ids, hc_data.screen_ids))[sorted_cols].mean(axis=0)
non_responder_screen = hc_data.get_raw_features(hc_data.get_subset(hc_data.non_responder_ids, hc_data.screen_ids))[sorted_cols].mean(axis=0)
responder_day0 = hc_data.get_raw_features(hc_data.get_subset(hc_data.responder_ids, hc_data.day0_ids))[sorted_cols].mean(axis=0)
non_responder_day0 = hc_data.get_raw_features(hc_data.get_subset(hc_data.non_responder_ids, hc_data.day0_ids))[sorted_cols].mean(axis=0)
responder_adj_wk1 = hc_data.get_raw_features(hc_data.get_subset(hc_data.responder_ids, hc_data.adjwk1_ids))[sorted_cols].mean(axis=0)
non_responder_adk_wk1 = hc_data.get_raw_features(hc_data.get_subset(hc_data.non_responder_ids, hc_data.adjwk1_ids))[sorted_cols].mean(axis=0)

In [4]:
import os
import pyBigWig

def series_to_wig_and_bw(s: pd.Series, wig_path: str, bw_path: str, chrom_sizes):
    chroms, starts, ends, values = [], [], [], []
    for region, v in s.items():
        chrom, span = region.split(':')
        start, end = span.split('-')
        chroms.append(chrom)
        starts.append(int(start))
        ends.append(int(end))
        values.append(float(v))

    # write bedGraph‚Äêstyle wig
    # with open(wig_path, 'w') as wig:
    #     wig.write(f'track type=bedGraph name="{os.path.basename(wig_path)}"\n')
    #     for c, st, en, val in zip(chroms, starts, ends, values):
    #         wig.write(f"{c}\t{st}\t{en}\t{val}\n")

    # load chrom sizes
    if isinstance(chrom_sizes, str):
        sizes = {}
        with open(chrom_sizes) as fh:
            for line in fh:
                name, size = line.strip().split()[:2]
                sizes[name] = int(size)
    else:
        sizes = chrom_sizes

    # write BigWig
    bw = pyBigWig.open(bw_path, 'w')
    bw.addHeader(list(sizes.items()))
    bw.addEntries(chroms, starts, ends=ends, values=values)
    bw.close()

In [5]:
series_to_wig_and_bw(responder_screen, 'responder_screen.wig', 'responder_screen.bw', '../F3/hg38.chrom.sizes')
series_to_wig_and_bw(non_responder_screen, 'non_responder_screen.wig', 'non_responder_screen.bw', '../F3/hg38.chrom.sizes')
series_to_wig_and_bw(responder_day0, 'responder_day0.wig', 'responder_day0.bw', '../F3/hg38.chrom.sizes')
series_to_wig_and_bw(non_responder_day0, 'non_responder_day0.wig', 'non_responder_day0.bw', '../F3/hg38.chrom.sizes')
series_to_wig_and_bw(responder_adj_wk1, 'responder_adj_wk1.wig', 'responder_adj_wk1.bw', '../F3/hg38.chrom.sizes')
series_to_wig_and_bw(non_responder_adk_wk1, 'non_responder_adj_wk1.wig', 'non_responder_adj_wk1.bw', '../F3/hg38.chrom.sizes')