# Get scRNA-seq from pMN neural progenitors from zebrafish

## Paper: [Prdm8 regulates pMN progenitor specification for motor neuron and oligodendrocyte fates by modulating the Shh signaling response](https://dev.biologists.org/content/147/16/dev191023?with-ds=yes)

### Bioproject: PRJNA656271

Input metadata for raw data stored in PRJNA656271_metadata.txt

## Download metadata of SRA runs

Navigate to project PRJNA656271 in SRA [here](https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA656271&o=acc_s%3Aa) and download metadata, rename to "PRJNA656271_metadata.txt"


In [None]:
import pandas as pd

In [None]:
PRJNA656271_meta_in = pd.read_csv("PRJNA656271_metadata.txt", header=0, delimiter=r',')

In [None]:
PRJNA656271_meta_in.columns

In [None]:
PRJNA656271_meta_in[["Experiment","Run","Developmental_stage"]]

# Get raw sequence data

3 developmental stages of pMN zebrafish neurons scRNA-seq in quadruplicates

Pull 1 24 hr post fertilization one, SRX8920143 (SRR12424270)




In [None]:
%%bash
##prefetch SRA files
prefetch --type fastq SRR12424270

#Or download directly
curl -s -L https://sra-download.ncbi.nlm.nih.gov/traces/sra70/SRR/012133/SRR12424270 > \
    /data_dir/zebrafish_experiments/raw_data/SRR12424270.sra
    

In [None]:
%%bash 
#Unpack SRA files, toss technicals, zip into fq.gz files
parallel-fastq-dump -t 16 --tmpdir /data_dir/zebrafish_experiments/temp/ \
    -s /data_dir/zebrafish_experiments/raw_data/SRR12424270.sra \
    --dumpbase --gzip --skip-technical --readids \
    --read-filter pass --split-files --origfmt \
    --outdir /data_dir/zebrafish_experiments/input_reads/pMN_zebra


In [None]:
parallel-fastq-dump -t 8 --tmpdir /data_dir/zebrafish_experiments/temp/ \
    -s /data_dir/zebrafish_experiments/raw_data/SRR10586525.sra \
    --dumpbase --clip --gzip --skip-technical --readids \
    --read-filter pass --split-files --origfmt \
    --outdir /data_dir/zebrafish_experiments/input_reads/pMN_zebra


In [None]:

#Dump subset of reads from scRNA-seq dataset into 2 files --- CHROMIUM V2
fastq-dump     \
    --dumpbase --origfmt  \
    --read-filter pass -N 1000000 -X 11000000   \
    --split-files   --outdir /data_dir/zebrafish_experiments/input_reads/pMN_zebra   \
    /data_dir/zebrafish_experiments/raw_data/SRR10586525


In [None]:

#Test STAR solo workflow using 1M zebrafish chromium V2 reads from SRR10586525
##Barcode + UMI at beginning of second read
cd /data_dir/zebrafish_experiments/input_reads/pMN_zebra/
STAR --runMode alignReads      \
     --genomeDir /data_dir/zebrafish_experiments/annotations/zebrafish/STAR_ix \
     --outSAMtype BAM SortedByCoordinate --runThreadN 8 \
     --outFileNamePrefix /data_dir/zebrafish_experiments/alignment_out/24_hr_post_fert/pMN_24_pf_s1_ \
     --outTmpDir /data_dir/zebrafish_experiments/temp/star2_tmp \
     --outReadsUnmapped Fastx --limitBAMsortRAM 27000000000 \
     --outWigType wiggle --quantMode TranscriptomeSAM \
     --readFilesIn SRR10586525_pass_3.fastq \
                   SRR10586525_pass_2.fastq \
     --soloType CB_UMI_Simple --soloCBwhitelist /data_dir/zebrafish_experiments/annotations/Chromium_v2_barcodes.txt \
     --soloUMIlen 10 --soloCBlen 16 --soloFeatures Gene --soloOutFileNames 24_hr_post_fert_s1_ --soloBarcodeReadLength 101


In [None]:
%%bash

#Filter reads using Fastp -- not needed?
fastp -p 14 -i /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_pass_1.fastq.gz \
      -I /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_pass_2.fastq.gz  \
      -o /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_filt_1.fastq.gz \
      -O /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_filt_2.fastq.gz \
      -h /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_fastp.html \
      -j /data_dir/zebrafish_experiments/input_reads/pMN_zebra/SRR12424270_fastp.json 


In [None]:
%%bash
mkdir -p /data_dir/zebrafish_experiments/alignment_out/24_hr_post_fert/

cd /data_dir/zebrafish_experiments/input_reads/pMN_zebra/
STAR --runMode alignReads      \
     --genomeDir /data_dir/zebrafish_experiments/annotations/zebrafish/STAR_ix \
     --outSAMtype BAM SortedByCoordinate --runThreadN 14 \
     --outFileNamePrefix /data_dir/zebrafish_experiments/alignment_out/24_hr_post_fert/pMN_24_pf_s1_\
     --outTmpDir /data_dir/zebrafish_experiments/temp/star2_tmp \
     --outReadsUnmapped Fastx --limitBAMsortRAM 27000000000 \
     --outWigType wiggle --quantMode TranscriptomeSAM   --readFilesCommand zcat \
     --readFilesIn SRR12424270_filt_1_1M.fq.gz \
                   SRR12424270_filt_2_1M.fq.gz

#Single cell lines
#     --soloType CB_UMI_Simple --soloCBwhitelist /data_dir/zebrafish_experiments/annotations/Chromium_v3_barcodes.txt \
#     --soloUMIlen 12 --soloFeatures Gene --soloOutFileNames 24_hr_post_fert_s1_ \


In [7]:
%%bash

cd /data_dir/zebrafish_experiments/input_reads/pMN_zebra/

#Attempt doing a hindbrain 24hr post fert sample
salmon alevin -l ISR \
              -1 SRR10586525_pass_2.fastq \
              -2 SRR10586525_pass_3.fastq \
              --chromium \
              -i /data_dir/zebrafish_experiments/annotations/zebrafish/salmon_ann/salmon_GRCz11_index \
              -p 3 \
              --mrna /data_dir/zebrafish_experiments/annotations/zebrafish/gencode_mt.txt \
              --rrna /data_dir/zebrafish_experiments/annotations/zebrafish/rRNA_ensembl.txt \
              -o /data_dir/zebrafish_experiments/alignment_out/24_hr_post_fert/pMN_24_pf_s1_salmon_ \
              --tgMap /data_dir/zebrafish_experiments/annotations/zebrafish/salmon_GRCz11_gencode_tran2gene.txt


Process is interrupted.


In [None]:
%%bash
cd /data_dir/zebrafish_experiments/input_reads/pMN_zebra/

#Attempt doing a hindbrain 24hr post fert sample
salmon alevin -l ISR \
              -1 SRR12424270_pass_1.fastq \
              -2 SRR12424270_pass_2.fastq \
              --chromiumV3 \
              -i /data_dir/zebrafish_experiments/annotations/zebrafish/salmon_ann/salmon_GRCz11_index \
              -p 3 \
              --mrna /data_dir/zebrafish_experiments/annotations/zebrafish/gencode_mt.txt \
              --rrna /data_dir/zebrafish_experiments/annotations/zebrafish/rRNA_ensembl.txt \
              -o /data_dir/zebrafish_experiments/alignment_out/24_hr_post_fert/pMN_24_pf_s1_appel_salmon_ \
              --tgMap /data_dir/zebrafish_experiments/annotations/zebrafish/salmon_GRCz11_gencode_tran2gene.txt
