In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pkl
import os
chrom_list = ['chr'+str(i) for i in range(1,23)]
chrom_list.append('chrX')

In [2]:
cn_fragments = pd.read_csv('../data/ATAC_frag_counts_CN_masked.txt', sep=' ', index_col=0).dropna()

#Mask regions with DNA copy-number < 0.5
cn_old = pd.read_csv('../data/ATAC_frag_counts_CN.txt', sep=' ', index_col=0).dropna()
cn_old = cn_old[cn_old.min(axis=1)>0]

mask = cn_old.index.isin(cn_fragments.index)
cn_fragments = cn_fragments[cn_fragments.min(axis=1)>0]
peaks = cn_fragments.index
cn_fragments = cn_fragments.reset_index(drop=True)
parse_ranges = lambda x: [i.split('-')[x] for i in peaks]
chroms = pd.Series(parse_ranges(0))
starts = pd.Series(parse_ranges(1)).astype('int')

In [3]:
par_samples = ['par_'+str(i) for i in range(1,11)]
cn_fragments['par_avg'] = cn_fragments[par_samples].mean(axis=1)
sample_list = ['B4', 'E5', 'E8', 'F2', 'F3', 'F9', 'K11', 'N6', 'N11', 'O16', 'F11', 'K2', 'par_avg']

for sample in sample_list:
    print(sample)
    sample_df = pd.DataFrame()
    scaled_fname = sample+'_perm-scaled.txt'
    if sample == 'par_avg':
        scaled_fname = 'par_1_perm-scaled.txt'
    scaled_fc = pd.read_csv(scaled_fname, sep='\t', header=None)[mask]
    scaled_fc = scaled_fc.reset_index(drop=True)[1]
    for chrom in chrom_list:
        chrom_fragments = cn_fragments.copy()[chroms==chrom]
        chrom_starts  = starts.copy()[chroms==chrom]
        chrom_bins = pd.Series(range(0, max(chrom_starts), int(1e6)))
        bin_ids = np.digitize(chrom_starts, chrom_bins)
        
        chrom_fc = chrom_fragments.copy()[sample]/chrom_fragments.copy()['parent']
        chrom_scaled = pd.DataFrame(scaled_fc)[chroms==chrom]
        
        fc_counts = chrom_fc.groupby(bin_ids).count()
        fc_avg = chrom_fc.groupby(bin_ids).mean()
        signal_avg = chrom_fragments[sample].groupby(bin_ids).sum()/chrom_fragments['parent'].groupby(bin_ids).sum()
        avg_height = chrom_fragments.copy()['parent'].groupby(bin_ids).mean()
        avg_scaled = chrom_scaled.copy().groupby(bin_ids).mean()
        
        in_bins = pd.Series([chrom_bins[i-1] for i in np.unique(bin_ids)])
        
        chrom_index = pd.Series([chrom+'-'+str(i) for i in in_bins])
        
        chrom_df = pd.DataFrame({'Peak-level_FC':fc_avg.values, 
                                 'Interval-level_FC':signal_avg.values, 
                                 'Perm-scaled_FC': avg_scaled[1].values,
                                 'Average_Height_Parent':avg_height.values, 
                                 'Peak_count':fc_counts.values}, index=chrom_index)
        
        sample_df = sample_df.append(chrom_df)
    sample_df['FC_centered'] = sample_df['Perm-scaled_FC']/np.median(sample_df['Perm-scaled_FC'])
    #sample_df.to_csv(sample+'_FC_table_1Mb.txt', sep='\t')

B4
E5
E8
F2
F3
F9
K11
N6
N11
O16
F11
K2
par_avg
