In [1]:
import sk8s

In [2]:
image = sk8s.docker_build(image_name="ngs-1",
                          conda=["bwa", "gatk4", "samtools", "google-cloud-sdk"],
                          channels=["conda-forge", "bioconda"],
                          pip=["numpy", "scipy", "matplotlib", "pandas"],
                          additional_config="RUN apt-get install -y gcc python3-dev python3-setuptools && pip3 uninstall crcmod && pip3 install --no-cache-dir -U crcmod")
    
image

'gcr.io/jared-genome-analysis/ngs-1'

In [3]:
# Create a volume to store the reference
reference_volume = sk8s.create_volume("100Gi", name="reference-volume")
reference_volume

'reference-volume'

In [5]:
def populate_reference_volume(volume):
    import subprocess

    def run_silent(cmd):
        return subprocess.run(cmd, check=True, shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)

    #result = run_silent(f"""mkdir -p /mnt/{volume}/hg38/ && gsutil -m rsync -r gs://genomics-public-data/references/hg38/v0/ /mnt/{volume}/hg38/""")
    result = run_silent(f"""mkdir -p /mnt/{volume}/hg38/""")
    result = run_silent(f"""gsutil -m rsync -r  gs://jared-genome/ref/GRCh38/ /mnt/{volume}/hg38/""")
    return "OK"

result = sk8s.run(populate_reference_volume, reference_volume,
                  volumes=[reference_volume],
                  image=image,
                  asynchro=False)

In [6]:
print(result)

OK


In [13]:
def align(fq1, fq2, output_bam, reference=f"/mnt/{reference_volume}/hg38/Homo_sapiens_assembly38.fasta"):
    import subprocess

    def run_and_log(cmd):
        proc = subprocess.run(cmd, check=True, shell=True, encoding="utf-8",
                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return dict(cmd=cmd,
                    returncode=proc.returncode,
                    stdout=proc.stdout,
                    stderr=proc.stderr)

    results = []
    results.append(run_and_log(f"gsutil -m cp {fq1} ./fq1.fq"))
    results.append(run_and_log(f"gsutil -m cp {fq2} ./fq2.fq"))
    results.append(run_and_log(f"bwa mem {reference} fq1.fq fq2.fq | samtools sort > out.bam"))
    results.append(run_and_log(f"samtools index out.bam"))
    results.append(run_and_log(f"gsutil -m cp out.bam {output_bam}"))
    results.append(run_and_log(f"gsutil -m cp out.bam.bai {output_bam}.bai"))

    return results

fq1 = "gs://jared-genome/tiny_r1.fq"
fq2 = "gs://jared-genome/tiny_r2.fq"

output_prefix = "gs://jared-genome/sk8s/"
output_bam = f"{output_prefix}tiny.bam"

alignment_result = sk8s.run(align, fq1, fq2, output_bam,
                            image=image, volumes=[reference_volume],
                            requests={"memory": "16Gi", "ephemeral-storage": "100Gi", "cpu": "8"},
                            limits={"memory": "16Gi", "ephemeral-storage": "100Gi"},
                            asynchro=False)
alignment_result

[{'cmd': 'gsutil -m cp gs://jared-genome/tiny_r1.fq ./fq1.fq',
  'returncode': 0,
  'stdout': '',
  'stderr': 'Copying gs://jared-genome/tiny_r1.fq...\n/ [0/1 files][    0.0 B/ 28.7 MiB]   0% Done                                    \n-\n- [0/1 files][ 22.7 MiB/ 28.7 MiB]  78% Done                                    \n- [1/1 files][ 28.7 MiB/ 28.7 MiB] 100% Done                                    \n\\\nOperation completed over 1 objects/28.7 MiB.                                     \n'},
 {'cmd': 'gsutil -m cp gs://jared-genome/tiny_r2.fq ./fq2.fq',
  'returncode': 0,
  'stdout': '',
  'stderr': 'Copying gs://jared-genome/tiny_r2.fq...\n/ [0/1 files][    0.0 B/ 28.7 MiB]   0% Done                                    \n-\n- [0/1 files][ 12.9 MiB/ 28.7 MiB]  44% Done                                    \n\\\n\\ [1/1 files][ 28.7 MiB/ 28.7 MiB] 100% Done                                    \nOperation completed over 1 objects/28.7 MiB.                                     \n'},
 {'cmd': 'bwa m

In [16]:
print(alignment_result[-2]["stderr"])

Copying file://out.bam...
/ [0/1 files][    0.0 B/ 14.8 MiB]   0% Done                                    
/ [1/1 files][ 14.8 MiB/ 14.8 MiB] 100% Done                                    
Operation completed over 1 objects/14.8 MiB.                                     

