In [1]:
import pandas as pd
import os

In [10]:
def read_annotated_file(fn, headers=None, src='brian'):
    """
    Reads in an annotated bedfile from either eric or me.
    Ensures that 'region' column is defined and set.
    Returns dataframe
    
    :param fn: basestring
        filename
    :param headers: list
        if src isn't from eric or me, you have to 
        explicitly set the header columns.
    :param src: basestring
        either 'brian' or 'eric' depending on whose script you use.
    :return df: pandas.DataFrame
    """
    if src == 'brian':
        headers=[
            'chrom','start','end','l10p','l2fc','strand','geneid',
            'genename','region','alloverlapping'
        ]
        df = pd.read_table(fn, names=headers)
    elif src == 'eric':
        headers=[
            'chrom','start','end','l10p','l2fc',
            'strand','annotation','geneid'
        ]
        df = pd.read_table(fn, names=headers)
        df['region'] = df.apply(return_region_eric, axis=1)
    else:
        assert 'region' in headers
        df = pd.read_table(fn, names=headers)
    return df

def split_annotated_file_into_region_beds_and_save(
    annotated, headers=None, src='brian',
    split_beds_directory=(os.path.join(os.getcwd(),'regions'))
):
    if not os.path.exists(split_beds_directory):
        os.mkdir(split_beds_directory)

    df = read_annotated_file(annotated, headers, src)
    regions = set(df['region'])
    for region in regions:
        region_df = df[df['region']==region][[
            'chrom','start','end','l10p','l2fc','strand'
        ]]
        region_df.to_csv(
            os.path.join(
                split_beds_directory,
                os.path.basename(annotated) + ".{}.bed".format(
                    region
                )
            ),
            sep='\t',
            header=False, 
            index=False
        )

In [11]:
annotated = '/home/bay001/projects/nazia_clipseq_20170627/permanent_data/eCLIP-0.1.5/annotated_peaks/DDX5.REP1.---.r-.fqTrTrU-SoMaSoCoSoMeV2ClNpCoFc3Pv3.bed.annotated.bed'

split_annotated_file_into_region_beds_and_save(annotated)