# Takes in STAMP "peaks" (poisson filtered) and generates the motif analysis commands. 

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import pandas as pd
import numpy as np
import os
import pybedtools
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from qtools import Submitter
from tqdm import tnrange, tqdm_notebook

In [2]:
input_dir = '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips'
output_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/figure_1/outputs/motifs/'
bash_script_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/01_SAILOR_bulk_rnaseq/bash_scripts/'

In [3]:
genome_fasta = '/projects/ps-yeolab3/bay001/annotations/hg19/hg19.fa'
cmds = []

In [4]:
def write_motif_analysis_cmd(bed_file, output_dir=output_dir, genome_fasta=genome_fasta):
    
    out_homer_dir = os.path.join(
        output_dir, 
        os.path.splitext(os.path.basename(bed_file))[0]
    )
    out_file = out_homer_dir + '.svg'
    out_pickle_file = out_homer_dir + '.pickle'
    if not os.path.exists(out_file):
        cmd = 'module load eclipanalysis/0.0.3a;analyze_motifs '
        cmd += '--peaks {} '.format(bed_file)
        cmd += '--out_pickle_file {} '.format(out_pickle_file)
        cmd += '--out_file {} '.format(out_file)
        cmd += '--species hg19 '
        cmd += '--out_homer_dir {} '.format(out_homer_dir)
        cmd += '--genome_fasta {} '.format(genome_fasta)
        return cmd
    else: 
        return None

In [5]:
all_bedfiles = sorted(glob.glob(os.path.join(input_dir, '*.bed')))
all_bedfiles

['/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.999_poisson_boost_100_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.999_poisson_boost_100_no_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.99_poisson_boost_100_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.99_poisson_boost_100_no_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.9_poisson_boost_100_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.9_poisson_boost_100_no_density.bed',
 '/projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/control_windows_merged.bed',
 '/projects/ps-yeolab3

In [6]:
for bed in all_bedfiles:
    cmds.append(write_motif_analysis_cmd(bed))

cmds[:2]

['module load eclipanalysis/0.0.3a;analyze_motifs --peaks /projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.999_poisson_boost_100_density.bed --out_pickle_file /home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/figure_1/outputs/motifs/c_terminus_score_0.999_poisson_boost_100_density.pickle --out_file /home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/figure_1/outputs/motifs/c_terminus_score_0.999_poisson_boost_100_density.svg --species hg19 --out_homer_dir /home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/figure_1/outputs/motifs/c_terminus_score_0.999_poisson_boost_100_density --genome_fasta /projects/ps-yeolab3/bay001/annotations/hg19/hg19.fa ',
 'module load eclipanalysis/0.0.3a;analyze_motifs --peaks /projects/ps-yeolab3/ekofman/New_STAMP_framework/rebuttal/CvC_and_NvN/shuffled_eclips/c_terminus_score_0.999_poisson_boost_100_no_density.bed --out_pickle_file /home

In [7]:
# If we decide to do this in batch, otherwise leave this as raw txt, not code.
Submitter(
    cmds, 
    'motif_analysis', 
    sh=os.path.join(bash_script_dir, 'motif_analysis.sh'), 
    nodes=1, 
    ppn=1, 
    array=True, 
    submit=False, 
    walltime='40:00:00'
)

Writing 15 tasks as an array-job.
Wrote commands to /home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/01_SAILOR_bulk_rnaseq/bash_scripts/motif_analysis.sh.


<qtools.submitter.Submitter at 0x2ae25dd0fc90>