In [1]:
import argparse

parser = argparse.ArgumentParser(description='Submit an sbatch job to fastq-dump each SRR in a file.')
parser.add_argument("--root-path")
parser.add_argument("--SRR-path")
parser.add_argument("--sbatch-time")
parser.add_argument("--sbatch-mem")
parser.add_argument("--sbatch-email")
parser.add_argument("--sbatch-email-type")
parser.add_argument("--sbatch-account")
parser.add_argument("--sbatch-partition")
parser.add_argument("--fdargs", nargs=argparse.REMAINDER)
args = parser.parse_args("--root-path /Users/Adele/Desktop/mock_SRR \
--SRR-path /Users/Adele/Desktop/move_to_repaired_Macbook/SRP073808_bulk_RNA-seq_SRR.txt \
--sbatch-time 24:0:0 \
--sbatch-mem 8G \
--sbatch-email adelexu@stanford.edu \
--sbatch-email-type ALL \
--sbatch-account mbarna \
--sbatch-partition batch \
--fdargs --split-files --readids --gzip --skip-technical --minSpotId 10000 --maxSpotId 10010000".split())

print(args)

Namespace(SRR_path='/Users/Adele/Desktop/move_to_repaired_Macbook/SRP073808_bulk_RNA-seq_SRR.txt', fdargs=['--split-files', '--readids', '--gzip', '--skip-technical', '--minSpotId', '10000', '--maxSpotId', '10010000'], root_path='/Users/Adele/Desktop/mock_SRR', sbatch_account='mbarna', sbatch_email='adelexu@stanford.edu', sbatch_email_type='ALL', sbatch_mem='8G', sbatch_partition='batch', sbatch_time='24:0:0')


In [45]:
import pandas as pd
import os
import subprocess

df = pd.read_csv(args.SRR_path, sep="\t", names=["dir_name", "SRR"])

for index, row in df.iterrows():
    dest_path = os.path.join(args.root_path, row.dir_name)
    print(dest_path)
    if not os.path.exists(dest_path):
        os.mkdir(dest_path)
        custom_fdargs = " ".join(args.fdargs+["--outdir",dest_path,row.SRR])
        cmd_line = f'sbatch \
--job-name={row.SRR} \
--output={row.SRR}.o \
--error={row.SRR}.e \
--mail-user={args.sbatch_email} \
--mail-type={args.sbatch_email_type} \
--time={args.sbatch_time} \
--mem={args.sbatch_mem} \
--account={args.sbatch_account} \
--partition={args.sbatch_partition} \
<<EOF \n#!/bin/sh \nmodule load sratoolkit/2.9.0 \nfastq-dump {custom_fdargs} \nEOF'
        print(cmd_line)
        subprocess.check_output(cmd_line)
    else:
        print(row.dir_name, ": ERROR: directory already exists")
    

sbatch --job-name=SRR3439477 --output=SRR3439477.o --error=SRR3439477.e --mail-user=adelexu@stanford.edu --mail-type=ALL --time=24:0:0 --mem=8G --account=mbarna --partition=batch <<EOF 
#!/bin/sh 
module load sratoolkit/2.9.0 
fastq-dump --split-files --readids --gzip --skip-technical --minSpotId 10000 --maxSpotId 10010000 --outdir /Users/Adele/Desktop/mock_SRR/hESC_1 SRR3439477 
EOF
sbatch --job-name=SRR3439478 --output=SRR3439478.o --error=SRR3439478.e --mail-user=adelexu@stanford.edu --mail-type=ALL --time=24:0:0 --mem=8G --account=mbarna --partition=batch <<EOF 
#!/bin/sh 
module load sratoolkit/2.9.0 
fastq-dump --split-files --readids --gzip --skip-technical --minSpotId 10000 --maxSpotId 10010000 --outdir /Users/Adele/Desktop/mock_SRR/hESC_2 SRR3439478 
EOF
sbatch --job-name=SRR3439480 --output=SRR3439480.o --error=SRR3439480.e --mail-user=adelexu@stanford.edu --mail-type=ALL --time=24:0:0 --mem=8G --account=mbarna --partition=batch <<EOF 
#!/bin/sh 
module load sratoolkit/2.9.0 

In [7]:
import subprocess

subprocess.check_output("echo 'Hello World!'; echo 'Nice to meet you!'", shell=True)

b'Hello World!\nNice to meet you!\n'

In [9]:
test_list = ["a", "b", "c", "x", "y", "z"]
"-".join(test_list)

'a-b-c-x-y-z'

In [41]:
" ".join(args.fdargs+["--outdir","/path/to/whatever","SRR000000"])

'--split-files --readids --gzip --skip-technical --minSpotId 10000 --maxSpotId 10010000 --output /path/to/whatever SRR000000'

In [3]:
import os

with open(os.path.join(args.root_path, "SRR000000_fastq-dump.config"), 'w') as config_file:
    config_file.write("testing...")