In [1]:
library(tidyverse)
library(Rsamtools)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: GenomeInfoDb

Load

In [2]:
data_dir = "/u/project/gandalm/shared/isoSeq/UCDavis"

## Proof of concept

In [6]:
cell1_dir = file.path(data_dir, "PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs")

In [3]:
flnc_report = read_csv(
    list.files(
        cell1_dir,
        pattern = "flnc\\.report-[0-9]+\\.csv",
        full.names = T
    ),
    col_types = "cfiiiif"
)
flnc_report

id,strand,fivelen,threelen,polyAlen,insertlen,primer
<chr>,<fct>,<int>,<int>,<int>,<int>,<fct>
m64069_220310_023238/3/ccs,+,39,40,31,3609,bc1006_5p--bc1006_3p
m64069_220310_023238/20/ccs,+,38,31,31,3322,bc1006_5p--bc1006_3p
m64069_220310_023238/24/ccs,+,39,41,31,1602,bc1006_5p--bc1006_3p
m64069_220310_023238/26/ccs,+,41,39,28,1493,bc1006_5p--bc1006_3p
m64069_220310_023238/33/ccs,+,32,39,32,5537,bc1006_5p--bc1006_3p
m64069_220310_023238/36/ccs,+,41,39,31,3609,bc1006_5p--bc1006_3p
m64069_220310_023238/46/ccs,+,38,41,32,3874,bc1006_5p--bc1006_3p
m64069_220310_023238/79/ccs,+,41,39,29,2933,bc1006_5p--bc1006_3p
m64069_220310_023238/97/ccs,+,38,41,31,3268,bc1006_5p--bc1006_3p
m64069_220310_023238/98/ccs,+,39,41,31,2094,bc1006_5p--bc1006_3p


In [6]:
cluster_report = read_csv(file.path(cell1_dir, "unpolished.cluster_report.csv"), col_types = "fcf")
cluster_report

cluster_id,read_id,read_type
<fct>,<chr>,<fct>
transcript/0,m64069_220310_023238/87098107/ccs,FL
transcript/0,m64069_220310_023238/34211224/ccs,FL
transcript/1,m64069_220310_023238/36766352/ccs,FL
transcript/1,m64069_220310_023238/74056423/ccs,FL
transcript/2,m64069_220310_023238/133038819/ccs,FL
transcript/2,m64069_220310_023238/147718263/ccs,FL
transcript/3,m64069_220310_023238/49547325/ccs,FL
transcript/3,m64069_220310_023238/10748351/ccs,FL
transcript/4,m64069_220310_023238/65144551/ccs,FL
transcript/4,m64069_220310_023238/129173761/ccs,FL


In [7]:
flnc_report = flnc_report %>%
    left_join(select(cluster_report, -read_type), by = c("id" = "read_id"))
flnc_report

id,strand,fivelen,threelen,polyAlen,insertlen,primer,cluster_id
<chr>,<fct>,<int>,<int>,<int>,<int>,<fct>,<fct>
m64069_220310_023238/3/ccs,+,39,40,31,3609,bc1006_5p--bc1006_3p,
m64069_220310_023238/20/ccs,+,38,31,31,3322,bc1006_5p--bc1006_3p,transcript/84978
m64069_220310_023238/24/ccs,+,39,41,31,1602,bc1006_5p--bc1006_3p,transcript/226049
m64069_220310_023238/26/ccs,+,41,39,28,1493,bc1006_5p--bc1006_3p,
m64069_220310_023238/33/ccs,+,32,39,32,5537,bc1006_5p--bc1006_3p,transcript/4697
m64069_220310_023238/36/ccs,+,41,39,31,3609,bc1006_5p--bc1006_3p,transcript/55301
m64069_220310_023238/46/ccs,+,38,41,32,3874,bc1006_5p--bc1006_3p,transcript/48518
m64069_220310_023238/79/ccs,+,41,39,29,2933,bc1006_5p--bc1006_3p,transcript/120179
m64069_220310_023238/97/ccs,+,38,41,31,3268,bc1006_5p--bc1006_3p,transcript/75751
m64069_220310_023238/98/ccs,+,39,41,31,2094,bc1006_5p--bc1006_3p,


In [8]:
bam_files = list.files(
    cell1_dir,
    pattern = "flnc-[0-9]+\\.bam",
    full.names = T
)
get_bam_tags = function(x) {
    bam_file = BamFile(x)
    params = ScanBamParam(what = c("qname"), tag = c("rq", "np"))
    bam_tags = scanBam(bam_file, param = params)
    bind_cols(tibble(qname = bam_tags[[1]]$qname), tibble(!!!bam_tags[[1]]$tag))
}
bam_tags = map_dfr(
    bam_files,
    get_bam_tags
)
bam_tags

qname,rq,np
<chr>,<dbl>,<int>
m64069_220310_023238/3/ccs,1.0000000,46
m64069_220310_023238/20/ccs,0.9998962,12
m64069_220310_023238/24/ccs,0.9999990,24
m64069_220310_023238/26/ccs,0.9999968,50
m64069_220310_023238/33/ccs,0.9709887,3
m64069_220310_023238/36/ccs,0.9999830,39
m64069_220310_023238/46/ccs,0.9924731,8
m64069_220310_023238/79/ccs,0.9999995,36
m64069_220310_023238/97/ccs,0.9999970,40
m64069_220310_023238/98/ccs,0.9995913,13


In [9]:
flnc_report = flnc_report %>%
    left_join(bam_tags, by = c("id" = "qname"))
flnc_report

id,strand,fivelen,threelen,polyAlen,insertlen,primer,cluster_id,rq,np
<chr>,<fct>,<int>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<int>
m64069_220310_023238/3/ccs,+,39,40,31,3609,bc1006_5p--bc1006_3p,,1.0000000,46
m64069_220310_023238/20/ccs,+,38,31,31,3322,bc1006_5p--bc1006_3p,transcript/84978,0.9998962,12
m64069_220310_023238/24/ccs,+,39,41,31,1602,bc1006_5p--bc1006_3p,transcript/226049,0.9999990,24
m64069_220310_023238/26/ccs,+,41,39,28,1493,bc1006_5p--bc1006_3p,,0.9999968,50
m64069_220310_023238/33/ccs,+,32,39,32,5537,bc1006_5p--bc1006_3p,transcript/4697,0.9709887,3
m64069_220310_023238/36/ccs,+,41,39,31,3609,bc1006_5p--bc1006_3p,transcript/55301,0.9999830,39
m64069_220310_023238/46/ccs,+,38,41,32,3874,bc1006_5p--bc1006_3p,transcript/48518,0.9924731,8
m64069_220310_023238/79/ccs,+,41,39,29,2933,bc1006_5p--bc1006_3p,transcript/120179,0.9999995,36
m64069_220310_023238/97/ccs,+,38,41,31,3268,bc1006_5p--bc1006_3p,transcript/75751,0.9999970,40
m64069_220310_023238/98/ccs,+,39,41,31,2094,bc1006_5p--bc1006_3p,,0.9995913,13


In [10]:
fl_counts = flnc_report %>%
    drop_na() %>%
    group_by(cluster_id, primer) %>%
    summarize(n = n(), np = sum(np)) %>%
    pivot_wider(
        names_from = "primer",
        values_from = "n",
        values_fill = 0,
        names_sort = T,
        id_cols = "cluster_id",
        unused_fn = sum
    )
fl_counts

[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


cluster_id,bc1006_5p--bc1006_3p,bc1008_5p--bc1008_3p,np
<fct>,<int>,<int>,<int>
transcript/0,0,2,5
transcript/1,0,2,8
transcript/2,0,2,23
transcript/3,0,2,7
transcript/4,1,1,23
transcript/5,0,4,31
transcript/6,1,8,77
transcript/7,0,2,21
transcript/8,0,2,19
transcript/9,1,2,38


In [11]:
fl_counts %>%
    filter(np > 7) %>%
    select(-np) %>%
    dplyr::rename(name = "cluster_id", BioSample_1 = "bc1006_5p--bc1006_3p", BioSample_2 = "bc1008_5p--bc1008_3p") %>%
    write_csv("R_output_tmp.csv")

## Generate for all samples

In [29]:
samples = read_csv(file.path(data_dir, "samples.csv"), col_types = "cccc") %>%
    mutate(across(ends_with("_path"), ~ file.path(data_dir, .x)))
samples

outputs_path,subreads_path,donor_id,sample_name
<chr>,<chr>,<chr>,<chr>
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/6_F01_Primary,209,209_1
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/6_F01_Primary,209,209_2
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/7_G01_Primary,209,209_3
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/8_H01_Primary,209,209_4
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/7_G01_Primary,334,334_1
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/C1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/C1/3_C01_Primary,334,334_2
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/D1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/D1/4_D01_Primary,334,334_3
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/E1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool2_Human_IsoSeq/r64069_20220525_225441/E1/5_E01_Primary,334,334_4
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool3_Human_IsoSeq/r64069_20220218_190600/B1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool3_Human_IsoSeq/r64069_20220218_190600/B1/2_B01_Primary,336,336_1
/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool3_Human_IsoSeq/r64069_20220603_212701/F1/outputs,/u/project/gandalm/shared/isoSeq/UCDavis/PB707_2plex_Pool3_Human_IsoSeq/r64069_20220603_212701/F1/6_F01_Primary,336,336_2


In [91]:
get_bam_tags = function(x) {
    bam_file = BamFile(x)
    params = ScanBamParam(what = c("qname"), tag = c("rq", "np"))
    bam_tags = scanBam(bam_file, param = params)
    bind_cols(tibble(qname = bam_tags[[1]]$qname), tibble(!!!bam_tags[[1]]$tag))
}

generate_fl = function(outputs_path, sample_name, ...) {
    cat(sprintf("%s\n", sample_name))

    flnc_report = read_csv(
        list.files(
            outputs_path,
            pattern = "flnc\\.report-[0-9]+\\.csv",
            full.names = T
        ),
        col_types = "cfiiiif"
    )

    cluster_report = read_csv(file.path(outputs_path, "unpolished.cluster_report.csv"), col_types = "fcf")

    bam_files = list.files(
        outputs_path,
        pattern = "flnc-[0-9]+\\.bam",
        full.names = T
    )
    bam_tags = map_dfr(
        bam_files,
        get_bam_tags
    )

    flnc_report = flnc_report %>%
        left_join(select(cluster_report, -read_type), by = c("id" = "read_id")) %>%
        left_join(bam_tags, by = c("id" = "qname"))

    fl_counts = flnc_report %>%
        drop_na() %>%
        group_by(cluster_id, primer) %>%
        summarize(n = n(), np = sum(np)) %>%
        pivot_wider(
            names_from = "primer",
            values_from = "n",
            values_fill = 0,
            names_sort = T,
            id_cols = "cluster_id",
            unused_fn = sum
        )

    fl_counts %>%
        filter(np > 7) %>%
        select(-np) %>%
        dplyr::rename(name = "cluster_id") %>%
        write_csv(str_glue("R_out/{sample_name}.fl_counts.csv"))
}

In [92]:
samples %>%
    pwalk(generate_fl)

209_1


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


209_2


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


209_3


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


209_4


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


334_1


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


334_2


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


334_3


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


334_4


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


336_1


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


336_2


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


336_3


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


336_4


[1m[22m`summarise()` has grouped output by 'cluster_id'. You can override using the `.groups` argument.


## Count reads by RQ

In [134]:
reads = read_csv(file.path(data_dir, "reads_flnc.csv"))
reads

[1mRows: [22m[34m24[39m [1mColumns: [22m[34m7[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): flnc_path, primer, region, sample_name, reads_name
[32mdbl[39m (2): read_count, donor_id

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


flnc_path,primer,read_count,donor_id,region,sample_name,reads_name
<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,1019085,209,VZ,209_1,209_1_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1972961,209,CP,209_1,209_1_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,895373,209,VZ,209_2,209_2_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1799193,209,CP,209_2,209_2_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,813346,209,VZ,209_3,209_3_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1638032,209,CP,209_3,209_3_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,872050,209,VZ,209_4,209_4_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1714296,209,CP,209_4,209_4_CP
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-1.bam,bc1012_5p--bc1012_3p,1708324,334,VZ,334_1,334_1_VZ
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-2.bam,bc1018_5p--bc1018_3p,1605255,334,CP,334_1,334_1_CP


In [124]:
get_read_counts = function(flnc_path, ...) {
    bam_file = BamFile(file.path(data_dir, flnc_path))
    params = ScanBamParam(what = c("qname"), tag = c("rq"))
    bam_tags = scanBam(bam_file, param = params)
    bam_tags = bind_cols(tibble(qname = bam_tags[[1]]$qname), tibble(!!!bam_tags[[1]]$tag))
    bam_tags %>%
        summarize(
            read_count_Q30 = sum(rq >= 0.999), # 99.9%
            read_count_Q20 = sum(rq >= 0.99),  # 99%
            read_count_Q10 = sum(rq >= 0.90),  # 90%
            read_count     = n()
        )
}

In [140]:
reads2 = reads %>%
    mutate(read_count = map(flnc_path, get_read_counts)) %>%
    unnest(read_count)
reads2

flnc_path,primer,read_count_Q30,read_count_Q20,read_count_Q10,read_count,donor_id,region,sample_name,reads_name
<chr>,<chr>,<int>,<int>,<int>,<int>,<dbl>,<chr>,<chr>,<chr>
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,712706,898802,1019085,1019085,209,VZ,209_1,209_1_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1357229,1728915,1972961,1972961,209,CP,209_1,209_1_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,718216,895373,895373,895373,209,VZ,209_2,209_2_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1428665,1799193,1799193,1799193,209,CP,209_2,209_2_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,647775,813346,813346,813346,209,VZ,209_3,209_3_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1293234,1638032,1638032,1638032,209,CP,209_3,209_3_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,684427,872050,872050,872050,209,VZ,209_4,209_4_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1332133,1714296,1714296,1714296,209,CP,209_4,209_4_CP
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-1.bam,bc1012_5p--bc1012_3p,1182708,1502194,1708324,1708324,334,VZ,334_1,334_1_VZ
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-2.bam,bc1018_5p--bc1018_3p,1098086,1406984,1605255,1605255,334,CP,334_1,334_1_CP


UC Davis used three different read quality quality cutoffs...

In [142]:
reads3 = reads2 %>%
    mutate(cutoff = case_when(
        read_count_Q30 == read_count ~ "Q30",
        read_count_Q20 == read_count ~ "Q20",
        read_count_Q10 == read_count ~ "Q10",
        TRUE ~ "none"
    )) %>%
    relocate(cutoff, .after = read_count)
reads3

flnc_path,primer,read_count_Q30,read_count_Q20,read_count_Q10,read_count,cutoff,donor_id,region,sample_name,reads_name
<chr>,<chr>,<int>,<int>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,712706,898802,1019085,1019085,Q10,209,VZ,209_1,209_1_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220303_201517/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1357229,1728915,1972961,1972961,Q10,209,CP,209_1,209_1_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,718216,895373,895373,895373,Q20,209,VZ,209_2,209_2_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/F1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1428665,1799193,1799193,1799193,Q20,209,CP,209_2,209_2_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,647775,813346,813346,813346,Q20,209,VZ,209_3,209_3_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/G1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1293234,1638032,1638032,1638032,Q20,209,CP,209_3,209_3_CP
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-1.bam,bc1006_5p--bc1006_3p,684427,872050,872050,872050,Q20,209,VZ,209_4,209_4_VZ
PB707_2plex_Pool1_Human_IsoSeq/r64069_20220505_190448/H1/outputs/flnc-2.bam,bc1008_5p--bc1008_3p,1332133,1714296,1714296,1714296,Q20,209,CP,209_4,209_4_CP
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-1.bam,bc1012_5p--bc1012_3p,1182708,1502194,1708324,1708324,Q10,334,VZ,334_1,334_1_VZ
PB707_2plex_Pool2_Human_IsoSeq/r64069_20220303_201517/G1/outputs/flnc-2.bam,bc1018_5p--bc1018_3p,1098086,1406984,1605255,1605255,Q10,334,CP,334_1,334_1_CP


In [143]:
reads3 %>% write_csv("reads_flnc.csv")