In [8]:
import subprocess, os

In [4]:
# Executable path for the MEME algorithm (depends on where you have it installed)
MEME_path = '/usr/local/bin/meme/bin'

In [6]:
# written by Peter Culviner, PhD to enable command-line access through Jupyter
def quickshell(command, print_output=True, output_path=None, return_output=False):
    process_output = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout = process_output.stdout.decode('utf-8')
    stderr = process_output.stderr.decode('utf-8')
    output_string = f'STDOUT:\n{stdout}\nSTDERR:\n{stderr}\n'
    if print_output:
        print('$ ' + command)
        print(output_string)
    if output_path is not None:
        with open(output_path, 'w') as f:
            f.write(output_string)
    if return_output:
        return stdout, stderr

# Generating background model

In [None]:
def generate_background_model(genome, output):
    get_markov_call = f'{MEME_path}/fasta-get-markov {genome} > {output}'
    quickshell(fimo_call, print_output = True)

# Motif scanning with FIMO

In [1]:
def sea_get_matches(input_fasta, input_control_fasta, output, motif_input_dir):
    """
    Re-run SEA without the --no-seqs argument hard-coded into xstreme to get the true positives.
    input_fasta: needs relative path, must be a string ending in .fasta.
    input_control_fasta: control sequences for background GC content/regulatory context.
        needs relative path, must be a string ending in .fasta.
    output: directory to contain the output results
    motif_input_dir: location of the de novo-discovered motif, found with MEME. (can update if STREME preferred)
    """
    sea_call = f'{MEME_path}/sea --verbosity 4 ' + \
                    f'--oc {output} ' + \
                    f'--qvalue --thresh 1 --order 2 --bfile {motif_dir}/background ' + \
                    f'--seed 0 --align center --motif-pseudo 0.01 ' + \
                    f'--m {motif_input_dir}/meme_out/meme.xml --m {motif_dir}/streme_out/streme.xml ' + \
                    f'--p {input_fasta} ' + \
                    f'--n {input_control_fasta}'
    quickshell(sea_call, print_output = True)

In [2]:
def fimo_changeThreshold(input_fasta, motif_input_dir, output, motif, pval_threshold):
    """
    Re-run fimo with a lower threshold to extract all true positives from SEA analysis.
    input_fasta: needs relative path, must be a string ending in .fasta.
    motif_input_dir: location of the de novo-discovered motif, found with MEME. (can update if STREME)
    output: directory to contain the output results.
    motif: motif to scan for using IUPAC notation (e.g. KGUGAYBHASVUCAC)
    pval_threshold: threshold to identify significant motifs
    """
    fimo_call = f'{MEME_path}/fimo ' + \
                f'--parse-genomic-coord --verbosity 4 --oc {output} ' + \
                f'--bfile {motif_input_dir}/background --motif {motif} --thresh {pval_threshold} ' + \
                f'{motif_input_dir}/meme_out/meme.xml ' + \
                f'{input_fasta}'
    quickshell(fimo_call, print_output = True)

# Scanning for CRP motif in CFG vs. in cellulo

In [None]:
fimo_changeThreshold('fig2_inCellulo_vs_CFG/all_NT_CFG.fasta',
                    '5enrich_CRP/selectThreshold/MEME_outputs/noCRP_CRP_20counts/noCRP_CRP_20counts_min100plus30_xstreme',
                    '5enrich_CRP/initiation_figures/CFG_motif_scan',
                    'KGUGAYBHASVUCAC',
                    0.00324)

In [11]:
fimo_changeThreshold('fig2_inCellulo_vs_CFG/all_NT_inCellulo.fasta',
                    '5enrich_CRP/selectThreshold/MEME_outputs/noCRP_CRP_20counts/noCRP_CRP_20counts_min100plus30_xstreme',
                    '5enrich_CRP/initiation_figures/inCellulo_motif_scan',
                    'KGUGAYBHASVUCAC',
                    0.00324)