In [1]:
# build the image on the cypress
# Code: singularity build SAW_7.0.sif docker://stomics/saw:07.0.0
# download the singularity image from cypress

In [1]:
import os
import re
import shutil

In [2]:
def extract_number(filename):
    """ Extracts the number from the filename for sorting. """
    match = re.search(r'_(\d+)_', filename)
    return int(match.group(1)) if match else 0

In [3]:
if not os.path.exists("process_data1"):
    os.mkdir("process_data1")

In [4]:
if not os.path.exists("process_data1/case"):
    os.mkdir("process_data1/case")
if not os.path.exists("process_data1/control"):
    os.mkdir("process_data1/control")

In [5]:
folder_list = ["case", "control"]
for condition in folder_list:
    for sample in os.listdir(condition):
        if not os.path.exists("process_data1/{}/{}".format(condition, sample)):
            os.mkdir("process_data1/{}/{}".format(condition, sample))

In [6]:
if not os.path.exists("tmp"):
    os.mkdir("tmp")

for folder in os.listdir("case"):
    if os.path.exists("case/{}/{}".format(folder, folder)):
        if os.path.exists("tmp/{}".format(folder)):
            shutil.rmtree("tmp/{}".format(folder))
        os.rename("case/{}/{}".format(folder, folder), "tmp/{}".format(folder))

In [25]:
for condition in folder_list:
    for sample in os.listdir(condition):
        for file in os.listdir("{}/{}/00.Rawdata/mask".format(condition, sample)):
            if ".h5" in file:
                mask_file = file
        
        read1 = []
        for reads in os.listdir("{}/{}/00.Rawdata/reads/".format(condition, sample)):
            if "_1" in reads:
                read1.append("$dataDir/reads/" + reads)
        read1 = sorted(read1, key=extract_number)
        read1 = ','.join(read1)
        
        read2 = []
        for reads in os.listdir("{}/{}/00.Rawdata/reads".format(condition, sample)):
            if "_2" in reads:
                read2.append("$dataDir/reads/" + reads)
        read2 = sorted(read2, key=extract_number)        
        read2 = ','.join(read2)
        
        for image in os.listdir("{}/{}/00.Rawdata/".format(condition, sample)):
            if "image" in image:
                for i in os.listdir("{}/{}/00.Rawdata/".format(condition, sample, image)):
                    if "ipr" in i:
                        imageRecordFile = "{}/{}/00.Rawdata/{}/{}".format(condition, sample, image, i)

        for image in os.listdir("{}/{}/00.Rawdata/".format(condition, sample)):
            if "image" in image:
                for i in os.listdir("{}/{}/00.Rawdata/".format(condition, sample, image)):
                    if ".gz" in i:
                        imageCompressedFile = "{}/{}/00.Rawdata/{}/{}".format(condition, sample, image, i)

        
        bash_file = [
            "NUMBA_CACHE_DIR=/work/ygong/Stereo-seq/raw_data/{}/{}".format(condition, sample),

            "dataDir=/work/ygong/Stereo-seq/raw_data/{}/{}/00.Rawdata".format(condition, sample),
            "outDir=/work/ygong/Stereo-seq/raw_data/process_data1/{}/{}".format(condition, sample),

            "export SINGULARITY_BIND=$dataDir,$outDir",
            "bash /work/ygong/Stereo-seq/raw_data/stereoPipeline.sh \\",
            "-sif /work/ygong/Stereo-seq/raw_data/SAW_7.0.sif \\",
            "-splitCount 1 \\",
            "-maskFile $dataDir/mask/{} \\".format(mask_file),
            "-fq1 {} \\".format(read1),
            "-fq2 {} \\".format(read2),
            "-speciesName human \\",
            "-tissueType brain \\",
            "-refIndex /work/ygong/Stereo-seq/raw_data/reference/human/STAR_SJ100 \\",
            "-annotationFile /work/ygong/Stereo-seq/raw_data/reference/human/genes/genes.gtf \\",
            "-rRNAremove : N \\",
            "-threads 48 \\",
            "-outDir $outDir/result \\",
            "-imageRecordFile imageRecordFile \\",
            "-imageCompressedFile imageCompressedFile \\",
            "-doCellBin Y"
        ]

        with open("{}/{}/{}_preprocess.sh".format(condition, sample, sample), "w") as file:
            for line in bash_file:
                file.write(line + "\n")

In [79]:
for condition in folder_list:
    for sample in os.listdir(condition):
        run_bash = [
            "#!/bin/bash",
            "#SBATCH -p checkpt",
            "#SBATCH -t 1-00:00:00",
            "#SBATCH -N 1",
            "#SBATCH -c 48",
            "#SBATCH -o {}_prepare_out".format(sample),
            "#SBATCH -e {}_prepare_err".format(sample),
            "#SBATCH -A loni_momics01",
            
            "source {}_preprocess.sh".format(sample)
        ]

        with open("{}/{}/{}_bash.sh".format(condition, sample, sample), "w") as file:
            for line in run_bash:
                file.write(line + "\n")

In [27]:
for condition in folder_list:
    for sample in os.listdir(condition):
        current_dir = os.getcwd()
        os.chdir("{}/{}".format(condition, sample))
        os.system("sbatch {}_bash.sh".format(sample))
        os.chdir(current_dir)

Submitted batch job 401792 estimates 1152 SUs from allocation loni_momics01. Estimated remaining SUs: 3988151
JOBID      NAME                PARTITION  TIME_LIMIT  ST  NODES  REASON                                       
401792     B02009F6_bash.sh    checkpt    1-00:00:00  PD  1      None                                         
Submitted batch job 401793 estimates 1152 SUs from allocation loni_momics01. Estimated remaining SUs: 3988151
JOBID      NAME                PARTITION  TIME_LIMIT  ST  NODES  REASON                                       
401793     A02092E1_bash.sh    checkpt    1-00:00:00  PD  1      None                                         
Submitted batch job 401794 estimates 1152 SUs from allocation loni_momics01. Estimated remaining SUs: 3988151
JOBID      NAME                PARTITION  TIME_LIMIT  ST  NODES  REASON                                       
401794     B01809C2_bash.sh    checkpt    1-00:00:00  PD  1      None                                         
Subm

In [20]:
read1 = []
for read in os.listdir("case/A02092E1/00.Rawdata/reads/"):
    if "_1" in read:
        read1.append("$dataDir/" + read) 

In [21]:
read1 = sorted(read1, key=extract_number)
read1 = ','.join(read1)
read1

'$dataDir/E100072854_L01_57_1.fq.gz,$dataDir/E100072854_L01_58_1.fq.gz,$dataDir/E100072854_L01_59_1.fq.gz,$dataDir/E100072854_L01_60_1.fq.gz,$dataDir/E100072854_L01_61_1.fq.gz,$dataDir/E100072854_L01_62_1.fq.gz,$dataDir/E100072854_L01_63_1.fq.gz,$dataDir/E100072854_L01_64_1.fq.gz'

In [None]:
read1_1 = []
        for j in read1:
            read1_1.append("$dataDir/" + j)
        read1_1 = ','.join(read1_1)