In [33]:
import argparse
import logging
import os
import sys

gitpath=os.path.expanduser("~/git/cshlwork")
sys.path.append(gitpath)
gitpath=os.path.expanduser("~/git/mapseq-processing")
sys.path.append(gitpath)

from configparser import ConfigParser

import pandas as pd

from cshlwork.utils import JobRunner, JobStack, JobSet
from mapseq.core import get_default_config, load_sample_info, load_barcodes, process_fastq_pair, make_summaries  

In [34]:
# Setup
cp = get_default_config()
sampleinfo = os.path.expanduser('~/project/mapseq/M205test/Mseq205_sampleinfo.xlsx')
barcodes = os.path.expanduser('~/project/mapseq/M205test/barcode_v2.txt')
outdir = os.path.expanduser('~/project/mapseq/M205testout')
infiles = [
    os.path.expanduser('~/project/mapseq/M205test/M205_HZ_S1_R1_001.fastq.gz'),
    os.path.expanduser('~/project/mapseq/M205test/M205_HZ_S1_R2_001.fastq.gz')
          ]
logging.getLogger().setLevel(logging.INFO)   


In [35]:
sampdf = load_sample_info(cp, sampleinfo)
sampdf

Unnamed: 0,usertube,ourtube,samplename,siteinfo,rtprimer,brain,col_num
1,OB,1.0,Olfactory Bulb,,1.0,YW143,1.0
2,ACB,2.0,ACB,,2.0,YW143,2.0
3,AI,3.0,AI,,3.0,YW143,3.0
4,CP,4.0,CP (dorsal part - can just dissect out the top...,,4.0,YW143,4.0
5,MTN,5.0,MTN,,5.0,YW143,5.0
6,BLAa,6.0,BLAa,,6.0,YW143,6.0
7,PIR,7.0,Piriform Cortex,,7.0,YW143,7.0
8,VTA,8.0,VTA,,8.0,YW143,8.0
9,TeA,9.0,TeA,,9.0,YW143,9.0
10,ENTl,10.0,ENTl,,10.0,YW143,10.0


In [36]:
# extract list of actually used primers.
rtlist = list(sampdf.rtprimer.dropna())
len(rtlist)

26

In [37]:
# make barcode handler objects
bcolist = load_barcodes(cp, barcodes, labels=rtlist, outdir=outdir)
len(bcolist)

26

In [38]:
# handle all the input. usually takes ~25 minutes
process_fastq_pair(cp, infiles[0], infiles[1], bcolist, outdir=outdir)
!ls ~/project/mapseq/M205testout/

2022-12-08 16:07:46,677 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 50000 reads. matched=614 unmatched=49386
2022-12-08 16:07:50,871 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 100000 reads. matched=1231 unmatched=98769
2022-12-08 16:07:55,066 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 150000 reads. matched=1884 unmatched=148116
2022-12-08 16:07:59,252 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 200000 reads. matched=2522 unmatched=197478
2022-12-08 16:08:03,626 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 250000 reads. matched=3180 unmatched=246820
2022-12-08 16:08:07,837 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 300000 reads. matched=3777 unmatched=296223
2022-12-08 16:08:12,134 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 350000 reads. matched=4436 unmatched=345564
2022-12-08 16:08:16,454 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 400000 r

2022-12-08 16:12:24,609 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3200000 reads. matched=39858 unmatched=3160142
2022-12-08 16:12:29,589 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3250000 reads. matched=40459 unmatched=3209541
2022-12-08 16:12:34,585 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3300000 reads. matched=41075 unmatched=3258925
2022-12-08 16:12:39,538 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3350000 reads. matched=41702 unmatched=3308298
2022-12-08 16:12:44,466 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3400000 reads. matched=42329 unmatched=3357671
2022-12-08 16:12:49,418 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3450000 reads. matched=42968 unmatched=3407032
2022-12-08 16:12:54,391 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 3500000 reads. matched=43582 unmatched=3456418
2022-12-08 16:12:59,273 (UTC) [ INFO ] core.py:318 root.process_fastq

2022-12-08 16:17:10,980 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6300000 reads. matched=78528 unmatched=6221472
2022-12-08 16:17:15,470 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6350000 reads. matched=79158 unmatched=6270842
2022-12-08 16:17:19,844 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6400000 reads. matched=79758 unmatched=6320242
2022-12-08 16:17:24,331 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6450000 reads. matched=80361 unmatched=6369639
2022-12-08 16:17:28,690 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6500000 reads. matched=80986 unmatched=6419014
2022-12-08 16:17:33,045 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6550000 reads. matched=81636 unmatched=6468364
2022-12-08 16:17:37,402 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 6600000 reads. matched=82202 unmatched=6517798
2022-12-08 16:17:41,733 (UTC) [ INFO ] core.py:318 root.process_fastq

2022-12-08 16:21:52,074 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9400000 reads. matched=117265 unmatched=9282735
2022-12-08 16:21:56,560 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9450000 reads. matched=117872 unmatched=9332128
2022-12-08 16:22:01,234 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9500000 reads. matched=118473 unmatched=9381527
2022-12-08 16:22:05,854 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9550000 reads. matched=119052 unmatched=9430948
2022-12-08 16:22:10,380 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9600000 reads. matched=119638 unmatched=9480362
2022-12-08 16:22:14,916 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9650000 reads. matched=120258 unmatched=9529742
2022-12-08 16:22:19,382 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 9700000 reads. matched=120862 unmatched=9579138
2022-12-08 16:22:23,855 (UTC) [ INFO ] core.py:318 root.proces

2022-12-08 16:26:31,269 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12450000 reads. matched=154959 unmatched=12295041
2022-12-08 16:26:35,683 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12500000 reads. matched=155546 unmatched=12344454
2022-12-08 16:26:40,039 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12550000 reads. matched=156137 unmatched=12393863
2022-12-08 16:26:44,365 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12600000 reads. matched=156755 unmatched=12443245
2022-12-08 16:26:48,719 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12650000 reads. matched=157372 unmatched=12492628
2022-12-08 16:26:53,039 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12700000 reads. matched=158030 unmatched=12541970
2022-12-08 16:26:57,638 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 12750000 reads. matched=158640 unmatched=12591360
2022-12-08 16:27:02,529 (UTC) [ INFO ] core.py:3

2022-12-08 16:31:06,761 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15500000 reads. matched=192900 unmatched=15307100
2022-12-08 16:31:11,053 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15550000 reads. matched=193495 unmatched=15356505
2022-12-08 16:31:15,581 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15600000 reads. matched=194131 unmatched=15405869
2022-12-08 16:31:20,286 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15650000 reads. matched=194767 unmatched=15455233
2022-12-08 16:31:24,856 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15700000 reads. matched=195407 unmatched=15504593
2022-12-08 16:31:29,288 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15750000 reads. matched=196024 unmatched=15553976
2022-12-08 16:31:33,818 (UTC) [ INFO ] core.py:318 root.process_fastq_pair(): handled 15800000 reads. matched=196659 unmatched=15603341
2022-12-08 16:31:38,371 (UTC) [ INFO ] core.py:3

BC1.bc.seq.bowtie  BC11.bc.seq.bowtie BC2.counts.tsv     BC4.bc.seq.fasta
BC1.bc.seq.bowtie2 BC11.bc.seq.fasta  BC2.fasta          BC4.counts.tsv
BC1.bc.seq.fasta   BC11.counts.tsv    BC20.fasta         BC4.fasta
BC1.counts.tsv     BC11.fasta         BC21.fasta         BC5.fasta
BC1.fasta          BC12.fasta         BC22.fasta         BC6.fasta
BC1.l1.seq.bowtie  BC13.fasta         BC23.fasta         BC7.fasta
BC1.l1.seq.fasta   BC14.fasta         BC24.fasta         BC8.fasta
BC1.si.seq.bowtie  BC15.fasta         BC25.fasta         BC9.fasta
BC1.si.seq.bowtie2 BC16.fasta         BC26.fasta         bt1.txt
BC1.si.seq.fasta   BC17.fasta         BC3.bc.seq.bowtie  bt2.txt
BC10.bc.seq.bowtie BC18.fasta         BC3.bc.seq.fasta   [34mindexes[m[m
BC10.bc.seq.fasta  BC19.fasta         BC3.counts.tsv     unmatched.fasta
BC10.counts.tsv    BC2.bc.seq.bowtie  BC3.fasta
BC10.fasta         BC2.bc.seq.fasta   BC4.bc.seq.bowtie
