In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from subprocess import run
import os

In [1]:
def makeFactorsIni(stm, color_list, gene, factor_ini_name = 'combined_factors'):
    
    factor_ini = 'pygenometracks/' + factor_ini_name + '.ini'
    
    if os.path.isfile(factor_ini):
        run('rm pygenometracks/' + factor_ini, shell=True)
        run('rm pygenometracks/*.bed', shell=True)
        run('rm pygenometracks/*.bw', shell=True)
    
    ymax = stm.max().max()
    
    factor_list = list(stm.columns)
    
    
    for i in range(len(factor_list)):
        factor = factor_list[i]
        color = color_list[i]
        make_bedGraph(stm, factor, gene)
        bedGraphToBigWig('pygenometracks/' + factor + '.' + gene)
        
        
        
        WriteBigWigIniFactor('pygenometracks/' + factor + '.' + gene, color, ymax)
        
        
        cmd = 'cat pygenometracks/'+ factor + '.' + gene + '.ini >> ' + factor_ini
        
        run(cmd, shell=True)
        
    WriteAllFactorsTrack(factor_list, color_list, gene, ymax, factor_ini)
    
    pygenome_cmd(factor_ini, factor_ini_name, stm.index)
    
    
def pygenome_cmd(factor_ini, factor_ini_name, stm_idx):
    
    chrom, start = stm_idx[0].split('.')
    chrom, end = stm_idx[-1].split('.')
    
    region = chrom + ':' + start + '-' + end
    
    out_png = 'plots/' + factor_ini_name + '.png'
    
    template = '''pyGenomeTracks --tracks {factor_ini} --region {region} --outFileName {out_png}'''
    
    template_cmd = template.format(factor_ini = factor_ini, region = region, out_png = out_png)
    
    print(template_cmd)


def make_bedGraph(stm, factor, gene):
    chrom = stm.index[0].split('.')[0][3:]
    start = stm.index[0].split('.')[1]
    end = stm.index[-1].split('.')[1]
    
    with open('pygenometracks/' + factor + '.' + gene + '.bed', 'w') as fh:
        fh.write('#bedGraph section ' + chrom + ':' + start + '-' + end + '\n')
        
        for idx in stm[factor].index:
            chrom = idx.split('.')[0]
            start = str(idx.split('.')[1])
            end = str(int(idx.split('.')[1])+1)
            score = str(stm.loc[idx, factor])
            
            row = '\t'.join([chrom, start, end, score]) + '\n'
            
            fh.write(row)
            

def bedGraphToBigWig(bg):
    
    template = "bedGraphToBigWig {bg}.bed hg38.chrom.sizes {bg}.bw"

    run(template.format(bg=bg), shell=True)
    

def WriteBigWigIniFactor(bg, color, ymax):
    
    template_first = '''[{bg}]
file = {bg}.bw
title = {bg}
height = 2
color = {color}
type = line
alpha = 1
min_value = 0
max_value = {ymax}
number_of_bins = 1000
nans_to_zeros = true
show_data_range = true
y_axis_values = original
file_type = bigwig\n\n'''
    with open(bg + '.ini', 'a') as fh:
        template_first = template_first.format(bg=bg, color=color, ymax=ymax)
        fh.write(template_first)
            
        
    
def WriteAllFactorsTrack(factor_list, color_list, gene, ymax, factor_ini):
    
    with open(factor_ini, 'a') as fh:
    
        count = 1
        for i in range(len(factor_list)):
            factor = factor_list[i]
            color = color_list[i]
            if count == 1:
                isFirst = True
            else:
                isFirst = False

            if isFirst:
                template_first = '''[all_factors]
file = pygenometracks/{factor}.{gene}.bw
height = 2
color = {color}
type = line
alpha = 0
min_value = 0
max_value = {ymax}
number_of_bins = 1000
nans_to_zeros = true
show_data_range = true
y_axis_values = original
file_type = bigwig\n\n'''
                fh.write(template_first.format(factor=factor, gene=gene, color=color, ymax=ymax))


            template = '''[{factor}]
file = pygenometracks/{factor}.{gene}.bw
height = 2
color = {color}
type = line
number_of_bins = 1000
nans_to_zeros = true
show_data_range = true
overlay_previous = share-y
file_type = bigwig\n\n'''
            
            fh.write(template.format(factor=factor, gene=gene, color=color))
            count += 1
    

In [3]:
factors =  pd.read_csv('tables/SRSF3_factors.tab', sep='\t', index_col=0).T
makeFactorsIni(factors, ['navy', 'red', 'gold'], 'SRSF3', factor_ini_name = 'SRSF3_K3_factors')

pyGenomeTracks --tracks pygenometracks/SRSF3_K3_factors.ini --region chr6:36593354-36606600 --outFileName plots/SRSF3_K3_factors.png


In [12]:
factors.index

Index(['chr6.36593354', 'chr6.36593355', 'chr6.36593356', 'chr6.36593357',
       'chr6.36593358', 'chr6.36593359', 'chr6.36593360', 'chr6.36593361',
       'chr6.36593362', 'chr6.36593363',
       ...
       'chr6.36606591', 'chr6.36606592', 'chr6.36606593', 'chr6.36606594',
       'chr6.36606595', 'chr6.36606596', 'chr6.36606597', 'chr6.36606598',
       'chr6.36606599', 'chr6.36606600'],
      dtype='object', length=13247)

In [13]:
chrom, start = factors.index[0]
chrom, end = factors.index[-1]

'chr6.36593354'