## CoViD-2 Pandemic analysis





In [None]:
%%bash
#get sequence transcriptome data
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq0.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq1.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq2.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq3.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq4.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq5.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_1.fq6.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq0.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq1.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq2.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq3.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq4.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq5.zst"
osf -p 8f6n9 fetch "osfstorage/DNBseq/Vero-Infected/Vero_SCV2_2.fq6.zst"

#get junctions table
osf -p 8f6n9 fetch "osfstorage/SupplementaryTables/Table S2.xlsx"


In [2]:
#Get junctions from paper into pandas and spit out as bed file

import pandas as pd

jxs_covid = pd.read_excel("/input_dir/corona_analysis/Table_s2_junctions.xlsx")

In [3]:
#Make bed files for covid junctions
jx_read_thresh = 5000
bed_out_jx = "/input_dir/corona_analysis/annotations/jx_s2_covid.bed"

with open(bed_out_jx,"w") as bed_out:
    for index, row in jxs_covid.iterrows():
        if (row["count"] > jx_read_thresh):
            b = ("hCoV-19/South_Korea/KCDC03/2020|EPI_ISL_407193" + 
                 "\t" + str(row["5' site"]) + 
                 "\t" + str(row["3' site"]) +
                 "\t" + "." + "\n")
            bed_out.write(b)
        

In [None]:
%%bash

#Flip junctions bed file to gtf
bedToGenePred /input_dir/corona_analysis/annotations/jx_s2_covid.bed stdout \
| genePredToGtf file stdin /input_dir/corona_analysis/annotations/jx_s2_covid.gtf


In [None]:
%%bash
# Align Nanopore called fq reads to GISAID: EPI_ISL_407193 using minimap2
minimap2 -k 8 -w 1 --splice -g 30000 -G 30000 -A1 -B2 -O2,24 \
         -E1,0 -C0 -z 400,200 --no-end-flt --junc-bonus=100 \
         -F 40000 -N 32 --splice-flank=no --max-chain-skip=40 \
         -un --junc-bed=/input_dir/corona_analysis/annotations/jx_s2_covid.bed -p 0.7 -t 16 -a \
            /input_dir/corona_analysis/annotations/EPI_ISL_407193_edit.fasta \
            /input_dir/corona_analysis/VeroInf24h.all.fastq \
            | samtools sort -T /input_dir/corona_analysis/temp -@ 8 \
            | samtools view -hbS - > /input_dir/corona_analysis/alignment_out/Covid_EPI_ISL_407193_nanopore.bam
        

In [None]:
%%bash
#Generate genome indices for STAR
STAR --runThreadN 16 \
     --runMode genomeGenerate \
     --genomeDir /input_dir/corona_analysis/annotations/STAR_ix \
     --genomeFastaFiles /input_dir/corona_analysis/annotations/EPI_ISL_407193_edit.fasta \
     --sjdbGTFfile /input_dir/corona_analysis/annotations/jx_s2_covid.gtf \
     --sjdbOverhang 99 --genomeSAindexNbases 8
    

In [None]:
%%bash
#Align DNBSeq output to Covid genome using STAR

STAR --runMode alignReads --outFilterType BySJout --outFilterMultimapNmax 20 \
     --alignSJoverhangMin 8 --outSJfilterOverhangMin 12 12 12 12 \
     --outSJfilterCountUniqueMin 1 1 1 1 --outSJfilterCountTotalMin 1 1 1 1 \
     --outSJfilterDistToOtherSJmin 0 0 0 0 --outFilterMismatchNmax 999 \
     --outFilterMismatchNoverReadLmax 0.04 --scoreGapNoncan -4 \
     --scoreGapATAC -4 --chimOutType WithinBAM HardClip \
     --genomeDir /input_dir/corona_analysis/annotations/STAR_ix \
     --chimScoreJunctionNonGTAG 0 --alignSJstitchMismatchNmax -1 -1 -1 -1 \
     --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
     --outSAMtype BAM SortedByCoordinate --runThreadN 16 \
     --outFileNamePrefix /input_dir/corona_analysis/alignment_out/Covid_EPI_ISL_407193 \
     --outTmpDir /input_dir/corona_analysis/temp/star \
     --outReadsUnmapped Fastx --limitBAMsortRAM 18000000000 \
     --outWigType wiggle --readFilesCommand zstdcat \
     --readFilesIn /input_dir/corona_analysis/Vero_SCV2_1.fq_all.zst /input_dir/corona_analysis/Vero_SCV2_2.fq_all.zst
