# bed-pileup
Stack bed file outputs from AmpliconClassifier to make a histogram of ecDNA amplifications across the genome. See **Fig. 3a**.  
Output: see ./bedgraph  
TODO:
- Make a bed-pileup of chr17p11.2 for osteosarcomas.

In [None]:
import pandas as pd
import subprocess
import shutil
import warnings
# Directory setup
import os
from pathlib import Path
# Import local dependencies
import sys
sys.path.append('../src')
from data_imports import *

In [None]:
# GLOBALS
BED_DIR=Path("../data/source/AmpliconClassifier/pedpancan_classification_bed_files") # bed outputs from AmpliconClassifier
SYM_DIR=Path('bed_symlinks')
SYM_DIR.mkdir(parents=True, exist_ok=True)
BDG_DIR=Path('bedgraph')
BDG_DIR.mkdir(parents=True,exist_ok=True)
BIOSAMPLES = import_biosamples()

In [None]:
# Functions
def symlink_beds(dirname, bed_list,verbose=False):
    '''
    dirname: place where the bed files should go, somewhere in ./bed_symlinks.
    bed_list: iterable of bed filenames in BED_DIR
    '''
    dirname = SYM_DIR/dirname
    dirname.mkdir(parents=True, exist_ok=True)
    for file in bed_list:
        source = Path(file).resolve()
        dest = dirname/file.name
        try:
            # Create the symlink
            dest.symlink_to(source)
            #shutil.copy(source,dest)
            if verbose:
                print(f'Successfully created symlink: {dest} -> {source}')
        except FileExistsError:
            if verbose:
                print(f'Symlink already exists: {dest}')
        except OSError as e:
            print(f'Error creating symlink for {source}: {e}')
    return

def get_deduplicated_beds(biosamples = None, tumor_type = None):
    '''
    Remove beds from duplicate biosamples
    Remove beds from duplicate AA runs
    eg SJST031395_D1_amplicon1_ecDNA_1_intervals.bed 
    vs 
	SJST031395_D1.WholeGenome_amplicon1_ecDNA_1_intervals.bed
    '''
    # Remove duplicate biosamples
    if biosamples is None:
        biosamples = import_biosamples()
    biosamples = biosamples[biosamples.in_unique_tumor_set]
    if tumor_type is not None:
        biosamples = biosamples[biosamples.cancer_type == tumor_type]
    unique_tumors = set(biosamples.index)
    beds = list(BED_DIR.glob('*.bed'))
    whitelisted_beds = [file for file in beds if any(whitelisted_str in file.name for whitelisted_str in unique_tumors)]
    # remove duplicate AA runs
    return whitelisted_beds

def run_bed_pileup(bed_dir,outfile):
    # parse inputs
    script_path = Path('..','src','bed_pileup.py')
    outfile = Path(outfile)
    command = [sys.executable, script_path, '-d', bed_dir, '-o', BDG_DIR/outfile]
    try:
        # Run the script and capture output and errors
        result = subprocess.run(command, capture_output=True, text=True)
        
        # Print the script's output
        print(result.stdout)
        
        # Print any errors that occurred
        if result.stderr:
            print(f"Errors:\n{result.stderr}", file=sys.stderr)
        
        # Return the result object, which contains info about the execution
        return result
    except Exception as e:
        print(f"Failed to run script: {e}", file=sys.stderr)
        return None

In [None]:
def make_ecDNA_bdg():
    beds = get_deduplicated_beds(BIOSAMPLES)
    run_name = "ecDNA_all"
    beds = [file for file in beds if "ecDNA" in file.name]
    symlink_beds(run_name,beds)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')
    return
def make_intrachromosomal_bdg():
    beds = get_deduplicated_beds(BIOSAMPLES)
    run_name = "intrachromosomal_all"
    beds = [file for file in beds if any(text in file.name for text in ['Complex-non-cyclic','Linear','BFB'])]
    symlink_beds(run_name,beds)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')
    return
def make_unknown_bdg():
    beds = get_deduplicated_beds(BIOSAMPLES)
    run_name = "unknown_all"
    beds = [file for file in beds if "unknown" in file.name]
    symlink_beds(run_name,beds)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')
    return

In [None]:
make_ecDNA_bdg()

In [None]:
make_intrachromosomal_bdg()

In [None]:
make_unknown_bdg()

## Tumor type specific

In [None]:
def make_tumortype_bdg(tumor_type):
    beds = get_deduplicated_beds(BIOSAMPLES,tumor_type)
    run_name = "ecDNA_"+tumor_type
    bed_subset = [file for file in beds if "ecDNA" in file.name]
    symlink_beds(run_name,bed_subset)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')

    run_name = "intrachromosomal_"+tumor_type
    bed_subset = [file for file in beds if any(text in file.name for text in ['Complex-non-cyclic','Linear','BFB'])]
    symlink_beds(run_name,bed_subset)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')
    
    run_name = "unknown_"+tumor_type
    bed_subset = [file for file in beds if "unknown" in file.name]
    symlink_beds(run_name,bed_subset)
    run_bed_pileup(SYM_DIR/run_name,run_name+'.bdg')
    return

In [None]:
make_tumortype_bdg('ACC')

## MPNST

In [None]:
import pyranges as pr
t1b = BED_DIR/'BS_N3FWKZB8_amplicon1_ecDNA_1_intervals.bed'
t2s1b1 = BED_DIR/'BS_A1DV9T7G_amplicon1_ecDNA_1_intervals.bed'
t2s1b2 = BED_DIR/'BS_A1DV9T7G_amplicon4_ecDNA_1_intervals.bed'
t2s2b = BED_DIR/'BS_WH8KWW5J_amplicon1_ecDNA_1_intervals.bed'

In [None]:
t1b = pr.read_bed(str(t1b))

In [None]:
t1b.length