In [1]:
# writing a script to count barcodes in a fasta file

from glob import glob
from Bio import SeqIO
import pandas as pd

In [None]:
# inputs:
# sample name
# fasta file


SAMPLE_TO_REF_SEQ_LEFT = {
    "2A": "GAACTATACAAATAA",
    "2B": "GAACTATACAAATAA",
    "2C": "GAACTATACAAATAA",
    "2D": "GAACTATACAAATAA",
    "3A": "ACTATATATAATCCAATC", #supposedul
    "3B": "GCTTGGTTTTGGAGA", 
    "3C": "TGAGTAGTACTGTGCGC",
}
SAMPLE_TO_REF_SEQ_RIGHT = {
    "2A": "CTCGAGGTTCGAGTA",
    "2B": "CTCGAGGTTCGAGTA",
    "2C": "CTCGAGGTTCGAGTA",
    "2D": "CTCGAGGTTCGAGTA",
    "3A": "AGATCGGAAGAGCGT", # supposedly
    "3B": "AGATCGGAAGAGCGTCGT",
    "3C": "AGATCGGAAGAGCGTCGT",
}


def count_barcodes(fastx_in, ref_seq_left, ref_seq_right, ftype=None, n_lim = 1000, look_only_5flank = False, len_bc = 15):
    """extract barcodes from each read in the FASTQ or FASTA file and returns df with columns 'bc' and 'bc_count'."""

    bc_to_count = {}  # dictionary mapping barcode to number of occurences
    bc_to_count["XXX"] = 0  # for reads where flanking sequences don't match
    c= 0
    for r in SeqIO.parse(fastx_in, ftype):
        c+=1
        if c>n_lim:
            break
        
        seq = r.seq
        #print('seq',seq)

        # in case the barcode is found on the other strand.
        seq_with_rc = seq + seq.reverse_complement()

        left_pos = seq_with_rc.find(ref_seq_left)
        right_pos = seq_with_rc.find(ref_seq_right)

        # looking for both 5' and 3' flank being right for samples 2
        if not look_only_5flank:
            if left_pos != -1 and right_pos != -1:
                bc_start = left_pos + len(ref_seq_left)  # index of first letter of bc
                bc_end = right_pos  # +1 of index of last letter of bc
                # if 13 <= (bc_end-bc_start) <= 17:
                bc = ''.join(seq_with_rc[bc_start:bc_end])
                # assert 13 <= len(bc) <= 17, f'bc of length {len(bc)} not reasonable'
                if bc not in bc_to_count:
                    bc_to_count[bc] = 1
                else:
                    bc_to_count[bc] += 1
            else:
                
                bc_to_count["XXX"] += 1

        # if only looking for the 5' flank, ie. for full length samples 3
        # can fetch some examples later to demonstrate this in the 3 samples.
        else:
            if left_pos == -1:
                bc_to_count["XXX"] += 1
            else:
                bc_start = left_pos + len(ref_seq_left)
                bc_end = bc_start + len_bc
                bc = ''.join(seq_with_rc[bc_start:bc_end])
                if bc not in bc_to_count:
                    bc_to_count[bc] = 1
                else:
                    bc_to_count[bc] += 1
    df = pd.DataFrame.from_dict(bc_to_count, orient="index").reset_index()
    df.columns = ["bc", "bc_count"]
    print('counted',c,'reads')
    display(df)

    return df


In [4]:
# for 2A
illumina_dir = "/data/davidding/dms/illumina_data/missing2a/"

fa_files = [f for f in glob(illumina_dir+'**/*extendedFrags_filtered.fasta', recursive=True)]
f_to_path = dict([(f.split('/')[-1], f) for f in fa_files])
print(len(f_to_path))

# create bc df for each fasta file
for f in sorted(f_to_path.keys()):
    print(f, f_to_path[f])
    f_path = f_to_path[f]
    
    df_bc = count_barcodes(
        f_path, 
        SAMPLE_TO_REF_SEQ_LEFT['2A'], 
        SAMPLE_TO_REF_SEQ_RIGHT['2A'], 
        ftype='fasta', 
        n_lim=10000000000, 
        look_only_5flank =False)
    df_bc.to_csv(f_path.replace('.fasta', '_bc.csv'), index=False)



6
rice2A1_RNA_S1_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/rice/2A/rice2A1_RNA_S1_L001_.extendedFrags_filtered.fasta
counted 3476590 reads


Unnamed: 0,bc,bc_count
0,XXX,539891
1,TCTTGTTACAACCAT,25
2,TTTTTTGTGGCAATC,1479
3,GGGCTGATCCGAGTT,153
4,CGCAAGCTTTGATTG,2
...,...,...
84928,CTTCAATGTCCTGAT,1
84929,CTTCCTCCCTCAGTT,1
84930,TAAAATCGTTGTTTT,1
84931,GAGAAACTTATTAAT,1


rice2A2_RNA_S2_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/rice/2A/rice2A2_RNA_S2_L001_.extendedFrags_filtered.fasta
counted 4980317 reads


Unnamed: 0,bc,bc_count
0,XXX,774349
1,TTTTCGTGAGAGCCA,114
2,ATCGACATGGAGGGC,53
3,GTAGATTATCTTTAC,76
4,GTATTCTCACTTTAT,86
...,...,...
93000,TATTGGACGTCACTT,1
93001,ATTCATCACGTTTTA,1
93002,CTGGAACTTATTGAG,1
93003,ATGGGATGCTTTTAC,1


rice2A_DNA_S3_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/rice/2A/rice2A_DNA_S3_L001_.extendedFrags_filtered.fasta
counted 4469257 reads


Unnamed: 0,bc,bc_count
0,XXX,693625
1,TGTATATTATTCAGG,112
2,TAAGATTTTTGATAT,118
3,TCGTGGTAAAATTCG,110
4,TTCGCATGATGACTA,315
...,...,...
84517,TTTGATCAAAATTCT,1
84518,TAGAACATTCAGAAG,1
84519,TAAGTACAGGTGACT,1
84520,TCATTTATATGATCCGT,1


sorghum2A1_RNA_S4_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/sorghum/2A/sorghum2A1_RNA_S4_L001_.extendedFrags_filtered.fasta
counted 6384572 reads


Unnamed: 0,bc,bc_count
0,XXX,1010581
1,TGTTTTGAAAGAAGA,45
2,TTAAAATGGACTTAT,85
3,TTCTCGCTCTATGGT,38
4,GTTTTTATTATAAAT,121
...,...,...
102887,GGTCAATTCCGCTAA,1
102888,TTATCGGCTGAAAAC,1
102889,GCAGCATGTAACTAG,1
102890,TATATCAATGTGTTT,1


sorghum2A2_RNA_S5_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/sorghum/2A/sorghum2A2_RNA_S5_L001_.extendedFrags_filtered.fasta
counted 3593664 reads


Unnamed: 0,bc,bc_count
0,XXX,572322
1,TACAATTTTCTAAAT,58
2,TAATTGCCACATTAT,11
3,TTCAGTCAAAATTGG,7
4,GTAGCGATCTTCTCG,37
...,...,...
90879,GTTGGGGGAGGTCTT,1
90880,AACTTTTTACGTGTT,1
90881,TTCATATACAAATGC,1
90882,AAGTTCATGGATTGG,1


sorghum2A_DNA_S6_L001_.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/missing2a/sorghum/2A/sorghum2A_DNA_S6_L001_.extendedFrags_filtered.fasta
counted 7852720 reads


Unnamed: 0,bc,bc_count
0,XXX,1194419
1,TACTATTCTGTTTTT,250
2,TTGCCCCTGCGTGAT,157
3,GAATTAATGTCGTCT,174
4,ATCGCTGTGATAACC,41
...,...,...
97184,CAAATCTCTTTAGAA,1
97185,GGTACACCGTGCTTC,1
97186,ACTCCTGCCTATTGG,1
97187,CCATTTGCTCAACTT,1


In [68]:
# for all samplles apart from 2A
# find all fasta files
illumina_dir = '/data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/'

fa_files = [f for f in glob(illumina_dir+'**/*extendedFrags_filtered.fasta', recursive=True)]
f_to_path = dict([(f.split('/')[-1], f) for f in fa_files])
print(len(f_to_path))

# create bc df for each fasta file
for f in sorted(f_to_path.keys()):
    print(f, f_to_path[f])
    f_path = f_to_path[f]
    

    sample_name = f_path.split('/')[-1][:2].upper()
    print(sample_name)

    if sample_name.startswith('3'):
        look_only_5flank = True
    else:
        look_only_5flank = False

    df_bc = count_barcodes(
        f_path, 
        SAMPLE_TO_REF_SEQ_LEFT[sample_name], 
        SAMPLE_TO_REF_SEQ_RIGHT[sample_name], 
        ftype='fasta', 
        n_lim=10000000000, 
        look_only_5flank =look_only_5flank)
    df_bc.to_csv(f_path.replace('.fasta', '_bc.csv'), index=False)





32
2b_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_3-ds.9df57b00ae2a48c1a756dff099b4a610/2b_dna_rep12_sorghum.extendedFrags_filtered.fasta
2B
counted 9213455 reads


Unnamed: 0,bc,bc_count
0,XXX,1231698
1,TCATGCAATGGTTTA,3194
2,TGAAGTCGCGGAGGT,12221
3,ATATTAACGGATAGA,32757
4,ACCGTGAGATTTTTT,63
...,...,...
85531,CCGCAATCATTCCAC,1
85532,TCAAGTCAAACTTGG,1
85533,GTTGATACATTTTCA,1
85534,ATACATACGACTCTG,1


2b_dna_rep34_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_12-ds.0b361dc0124b4d378f01a022858a9df5/2b_dna_rep34_sorghum.extendedFrags_filtered.fasta
2B
counted 28270633 reads


Unnamed: 0,bc,bc_count
0,XXX,3774655
1,GTGCTGACGGGTGTG,64432
2,CTATATTAACTCTTTA,622
3,TTATCAAATTTTCTT,92
4,GGCGTATCTGGAGTG,44807
...,...,...
115416,TTCTGTTATCAAATAA,1
115417,TAAATATAACATATC,1
115418,ACATCTAGTTGCTTG,2
115419,AATATTGCTGGTTTT,1


2b_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_1-ds.a852c77554964539afce4473a203c368/2b_rep1_sorghum.extendedFrags_filtered.fasta
2B
counted 5433470 reads


Unnamed: 0,bc,bc_count
0,XXX,715797
1,CATCCGTTTTTACCA,70907
2,GTTGCAGCTAGGTGT,23419
3,GGAACTGGTGGGAGA,603
4,GTGTAATCGAATGGC,29
...,...,...
63076,TTACACCGTTGATTG,1
63077,TGGCATCCGATACAG,1
63078,ATTGAAATATCTTTT,2
63079,TAGAATCTAGCTTATT,1


2b_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_2-ds.eb1a62282d314975b39ed8ae73f3c0cd/2b_rep2_sorghum.extendedFrags_filtered.fasta
2B
counted 9548636 reads


Unnamed: 0,bc,bc_count
0,XXX,1254981
1,AATGTTTCGACTACT,250
2,TAATTTTTTTTACGG,22
3,TACGCGATTTAGTG,139
4,TTGCTCCCCTTTTAT,323
...,...,...
73601,GTCGTCCGGTTGGTG,1
73602,ACGCTAGCTCTTTTG,1
73603,TCTTATAGTGAGAAT,2
73604,TCATATTCAACTACA,1


2b_rep3_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_10-ds.b99df2fd20b848eaaa5a6b18631220c5/2b_rep3_sorghum.extendedFrags_filtered.fasta
2B
counted 42031580 reads


Unnamed: 0,bc,bc_count
0,XXX,5495451
1,AAAGATTTCACAGAT,161
2,TTAATTTTAAGTAAT,2343
3,TCTTTCCTTCTAGCC,100224
4,TGTTTACTAAAAAGT,509
...,...,...
110400,CTTCATACGCATTGC,1
110401,GGCTCATTATATGT,1
110402,TTTTCAGCGTGCCTC,1
110403,TCCTTTTTTTCCTAA,1


2b_rep4_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_11-ds.3c9db5dd456a4823bea81a6330eeabae/2b_rep4_sorghum.extendedFrags_filtered.fasta
2B
counted 34440296 reads


Unnamed: 0,bc,bc_count
0,XXX,4481439
1,TAACTGGCAAGAGCG,34493
2,CAATTGTTTATACTT,36946
3,TTATTTTGTACATTT,44450
4,CATCCGTTTTTACCA,508123
...,...,...
104148,CACATTGTGTCATGA,1
104149,TTAGACACGTGCGTT,1
104150,GTGACTAGCTCTTT,1
104151,ACTTAACGTTATCTT,1


2c_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_6-ds.7745abd5889b44fc94378bc1923e306b/2c_dna_rep12_sorghum.extendedFrags_filtered.fasta
2C
counted 11146899 reads


Unnamed: 0,bc,bc_count
0,XXX,1466093
1,GTATCGGGAATTTAT,760
2,GTTTCTCGGTTGGTG,942
3,TCAATGGAGCCTGCA,745
4,AAATACCATTGTTTT,731
...,...,...
88038,TGCATAATTTTTTAT,1
88039,CAACACGTAAGCTGT,1
88040,ATACCAGTAGAGCTA,1
88041,TGCTACTACTTTGGC,1


2c_dna_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_28-ds.e18da3cd1d4b408c96508cdd518496da/2c_dna_rice.extendedFrags_filtered.fasta
2C
counted 37665233 reads


Unnamed: 0,bc,bc_count
0,XXX,4870647
1,GCTCGAGGTGACTCA,5355
2,GCGTTTCTTTCTAGA,295
3,AACAGGAAAATTCTT,2089
4,TGATTTTATCTATTT,2788
...,...,...
147161,GTGTACAGACGTGTT,1
147162,AACGGCTAAAACATT,1
147163,CATCTGTAGCATATC,1
147164,TGTTCTCTGACGGTT,1


2c_rep1_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_26-ds.f3bfebb8ae764be482b6a1c8dce82a49/2c_rep1_rice.extendedFrags_filtered.fasta
2C
counted 27043577 reads


Unnamed: 0,bc,bc_count
0,XXX,3597547
1,AGTTGGGTAATCATA,969
2,AATTAAATTTGCCTT,2533
3,AATTCAACCATTTAA,3941
4,AGTTATTAATCTATA,509
...,...,...
125284,TGCTTTATCCCGTGC,1
125285,TTAATTATCGAATTT,1
125286,CGCTTTATTGTCCTG,1
125287,CTTTGTAGCGAGGCA,2


2c_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_4-ds.6834f82a83a34cefae5f066d0817f237/2c_rep1_sorghum.extendedFrags_filtered.fasta
2C
counted 5020270 reads


Unnamed: 0,bc,bc_count
0,XXX,653992
1,GGTACATTACGCTGT,412
2,TTTTCCCTTTTACTT,268
3,CCCGGGACTAATCAT,43
4,TCACTGTCTAATATA,197
...,...,...
57408,TCGATTTCGTTCCTT,1
57409,TCAACGATACCAATA,1
57410,TATTAAATCGTCCTC,1
57411,ATTGAATATAATAAT,1


2c_rep2_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_27-ds.ab5dbaffd39d40a3b8362c03237bf9b1/2c_rep2_rice.extendedFrags_filtered.fasta
2C
counted 27042455 reads


Unnamed: 0,bc,bc_count
0,XXX,3564488
1,TTTACGATTTCGTAT,579
2,GTAGGAAGTATTGCC,2176
3,GGAGTTTTATGTACC,721
4,TATAAAGGGAATGCT,3304
...,...,...
107694,TATTCAACTAAATTG,1
107695,ATGTATTAACTCACT,1
107696,CCTAACTGTATACTC,1
107697,GTTGTACTTGACATT,1


2c_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_5-ds.8703e2b144d44711ac10e5de8125f4b3/2c_rep2_sorghum.extendedFrags_filtered.fasta
2C
counted 5265651 reads


Unnamed: 0,bc,bc_count
0,XXX,680598
1,CGAACGGCGGATCCC,398
2,TCGGATCTTGCGCGA,314
3,TTCAATGTATGGGCG,459
4,GCGTGCCGCTTTGTT,257
...,...,...
51877,TACTACCGAAGATTG,1
51878,TTGTATGTTGGCATC,1
51879,TATTATAAATTTCTA,1
51880,ATTCCATCTCTTACT,1


2d_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_9-ds.6fdb10a1831945ba9df471f944c01e2f/2d_dna_rep12_sorghum.extendedFrags_filtered.fasta
2D
counted 35005442 reads


Unnamed: 0,bc,bc_count
0,XXX,4247731
1,TTCTCGTTTTATGGG,1249
2,ACAGGGAACCTGGTT,747
3,CCCTCTTTAATACCT,635
4,ACGGTTATAGCCTTT,808
...,...,...
288810,ATTGGGTATTTAATA,1
288811,TGCATTCTTTTCAGA,1
288812,CAGGGTATCACAGAC,1
288813,ACTTTTTATTCCAGC,1


2d_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_7-ds.17a4ec2cc0a6497eaf491459f347c21f/2d_rep1_sorghum.extendedFrags_filtered.fasta
2D
counted 9959512 reads


Unnamed: 0,bc,bc_count
0,XXX,1235435
1,TAGAAGTTAAATTTA,130
2,CTTTGAGTATTAATT,136
3,ATATAGAATTAACAG,86
4,TTATTCTATTCTACT,92
...,...,...
165924,TTCGAGGTTGCTGCA,1
165925,ATGACTATCTACTAC,1
165926,TAGACGCTAGGGTTT,1
165927,GCTTTATGATTTTAT,1


2d_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_8-ds.a7777d478b2f4cf891bc47e7ae2902e4/2d_rep2_sorghum.extendedFrags_filtered.fasta
2D
counted 13310764 reads


Unnamed: 0,bc,bc_count
0,XXX,1615478
1,CGCATTCGTTAATTT,186
2,GGCAGATCCCGCTAT,295
3,AACTTATCTTCATTT,320
4,TCTGTTTAAGTAAAA,97
...,...,...
177377,AGGGTGGTCCACAAT,1
177378,TCCGTTTTTATCCCT,1
177379,TGTCGCTTCTTCGTT,1
177380,TTATCAACGGGAGAA,1


3a_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_15-ds.d772429cd72746b2bef338095eae3069/3a_dna_rep12_sorghum.extendedFrags_filtered.fasta
3A
counted 59387 reads


Unnamed: 0,bc,bc_count
0,XXX,58084
1,CGTATATAATCCAAT,287
2,CGTCCTCTCGCAGAT,1
3,TATCCGCTACTATAT,42
4,ACATCTCGAAATATA,270
...,...,...
223,TAAGTGAGCTTACTA,1
224,AATATTCATTGGTAG,1
225,TCTAGAAACTTATTA,1
226,CTTATATAATCCAAT,2


3a_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_13-ds.50504cfe1a864515be2fc7198d74a69f/3a_rep1_sorghum.extendedFrags_filtered.fasta
3A
counted 3368 reads


Unnamed: 0,bc,bc_count
0,XXX,1656
1,ACATCTCGAAATATA,214
2,GATACACGGATACAC,18
3,CGTTATCTGTTCTTA,148
4,CTTTTACAAGCTTAG,230
...,...,...
440,CTCTAAAATGACACT,1
441,AGCCTATACAGTCAT,1
442,TATTAGCATTTGTTC,1
443,GGATAATAAATTTAC,1


3a_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_14-ds.35e1f6053a64426d89fca6e67024c496/3a_rep2_sorghum.extendedFrags_filtered.fasta
3A
counted 1869 reads


Unnamed: 0,bc,bc_count
0,XXX,919
1,CGTATATAATCCAAT,184
2,GTCATTTAGCCGTGA,1
3,GCTGTTAATGGATTC,2
4,GTCATCTTCATAAGG,1
...,...,...
247,TGAGCGCGGTTGGAG,1
248,GCAATGCGCCTTTAG,2
249,GTATGGCTTTACTTC,1
250,CAGAATTTTCATATT,1


3b_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_18-ds.43caa805ab1947b6ba00c21ba3ef9fd8/3b_dna_rep12_sorghum.extendedFrags_filtered.fasta
3B
counted 41324958 reads


Unnamed: 0,bc,bc_count
0,XXX,9645356
1,TTATTAAACCACTTT,2141
2,GTATTGGTATTCACC,4817
3,ACTTTTCTCTGAATG,414
4,CTGTTTTTTTACTTC,1890
...,...,...
393085,TTACTAAAAGATTTG,1
393086,TTTCTAGATTGTTAA,1
393087,TCGACTTCACCATAT,1
393088,TTCTAAGTAAAGGTT,1


3b_dna_rep3_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_23-ds.50d034ddf1c04b7e84754c8b954c3246/3b_dna_rep3_sorghum.extendedFrags_filtered.fasta
3B
counted 51456833 reads


Unnamed: 0,bc,bc_count
0,XXX,11929229
1,GGTTTTGATTGTAGC,1797
2,TCCTAAATCAGCTTA,1432
3,ATATCTTACTGAGGT,3948
4,CGTTCGGCGACGTTG,2391
...,...,...
421020,AAAGTATTTGAGTTT,2
421021,TATTCGTTTTAGGAA,1
421022,TACTAGTTATAAGTT,1
421023,TCTATTTGTCCGATG,1


3b_dna_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_32-ds.384fb54ab1c84e6aabbf780d46d78cdc/3b_dna_rice.extendedFrags_filtered.fasta
3B
counted 47831792 reads


Unnamed: 0,bc,bc_count
0,XXX,11553816
1,TTCAGATGGAACTTA,19
2,ATGTCGGACAAGGAT,408
3,CTATGGGACTGAGGG,1340
4,CATGAGTAGTTGATA,3026
...,...,...
379182,GAATGTGCGTGCTAC,1
379183,TGGCTCCGATTTGGC,1
379184,GGGGGGTCTGTGATT,1
379185,TTTGTCTAGCTGTTA,1


3b_rep1_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_29-ds.be642e42315f4f7a9d539733e201a209/3b_rep1_rice.extendedFrags_filtered.fasta
3B
counted 25566685 reads


Unnamed: 0,bc,bc_count
0,XXX,6013298
1,ATTTCTCTCATTATC,1517
2,GTCTTAATCGCACAG,1634
3,TTGGCCGATTTGGTT,548
4,TTAGAAGACGGGAGC,1011
...,...,...
154201,GAGTTCAACTTGGTC,1
154202,TTGTCGAATGGTAAA,1
154203,CTTATTGGTCGGCCC,1
154204,CTACAATCCCTATTA,1


3b_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_16-ds.9f78070f379749f189f11365c2e56c54/3b_rep1_sorghum.extendedFrags_filtered.fasta
3B
counted 39617583 reads


Unnamed: 0,bc,bc_count
0,XXX,9634876
1,GTCTTTCTTAAACAT,46
2,AAAGACCGATTGCAA,3680
3,TAAGCATACTCGTTA,1268
4,GCTTTCAATGATATA,1447
...,...,...
328514,AGTACCTATTGGCTT,1
328515,GCCGTTACCTCATTC,1
328516,GGCTCAGATCAAGAA,1
328517,TTTACGTTTAGCTT,1


3b_rep2_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_30-ds.b475a51120c14b4f97c6bc89f08ed8e8/3b_rep2_rice.extendedFrags_filtered.fasta
3B
counted 22075419 reads


Unnamed: 0,bc,bc_count
0,XXX,5155203
1,TGGGAACCGCGTTAC,1
2,ATTTATACGTGAACC,2
3,AGCCATAGGCGATCC,376
4,TGCTATTAATAATAA,1
...,...,...
143052,TGGTGGATCCGCTTG,1
143053,AATACGTGAAGCATA,1
143054,ATGTCTTAAATACAG,1
143055,TCGAGACTGTTACTT,1


3b_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_17-ds.4170a39172b94c10b08c7bb16c30d7f1/3b_rep2_sorghum.extendedFrags_filtered.fasta
3B
counted 33362280 reads


Unnamed: 0,bc,bc_count
0,XXX,7883891
1,GAACTTACTATTCTG,130
2,CTTAATTCCGATTTG,987
3,AGTTGAAGCCTAACG,590
4,AGAAGTTAGTATGGC,718
...,...,...
268529,ATGGGACATGCCTGA,1
268530,CACGTGCCTAGGCGT,1
268531,GAAGTGCAATTTGCC,1
268532,GCAACGCCAACACTG,1


3b_rep3_rice.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_31-ds.cb19ff1f130749b6b8cb918a59359f7d/3b_rep3_rice.extendedFrags_filtered.fasta
3B
counted 28499415 reads


Unnamed: 0,bc,bc_count
0,XXX,6704969
1,AAGCCTCATAATTAA,1
2,AGCCAATCTTCTCCC,1
3,TCCTTAGTTATAAAA,1
4,TCGTGCGTGTCTAAA,1
...,...,...
156731,AAACGCGTTTTCTCC,1
156732,TTTTGAATTTATTCA,1
156733,TTCAAGTGTATGACT,1
156734,CTAGCGCTACTTTAG,1


3b_rep3_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_22-ds.72d1c9ca73634d14ac2a92ec88afbf44/3b_rep3_sorghum.extendedFrags_filtered.fasta
3B
counted 19214977 reads


Unnamed: 0,bc,bc_count
0,XXX,4589255
1,ATTTGATTAATGGGC,15
2,AGATTTATATTTGTA,1010
3,TATAGGGAAATAAAC,1002
4,CTTCAAGCTGGTTGG,1078
...,...,...
152284,TATCATATTGCATGT,2
152285,CCCAGTTATGGTGGG,1
152286,TATTTTGGCCAAATC,1
152287,TCGTAAAACGTTTCC,1


3c_dna_rep12_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_21-ds.c832b83dda3343088319ecae557bbe1c/3c_dna_rep12_sorghum.extendedFrags_filtered.fasta
3C
counted 37518602 reads


Unnamed: 0,bc,bc_count
0,XXX,10116114
1,TAGAATATGCCGAAA,742
2,AGAGCATTATTAACA,1
3,TTCGTTATTTTAGCC,1562
4,GGGTGTCCTATATAA,1
...,...,...
179782,TATGGTCATTCTGTG,1
179783,TCTTTTTTGTTCTAT,1
179784,TTCTTAATGATCTCC,1
179785,ACCGGTTTGTTTTAG,1


3c_dna_rep3_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_25-ds.1b3639cc99cb4837998072c8bbc13a7f/3c_dna_rep3_sorghum.extendedFrags_filtered.fasta
3C
counted 64740288 reads


Unnamed: 0,bc,bc_count
0,XXX,18090415
1,CATGTAGGTTTGTTC,1892
2,GATAGATATCATTTA,4755
3,TGTATCGAATCATGT,2828
4,AATAGGCGGGTTATT,2402
...,...,...
199912,CGATTACATTAAAAA,1
199913,CGTTCTTCGTCCATC,1
199914,AAGTTAGCCGATATT,1
199915,TATAGCACTTCACCT,1


3c_rep1_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_19-ds.bcf9f1c0fc61433da1f57fea0373a244/3c_rep1_sorghum.extendedFrags_filtered.fasta
3C
counted 42560026 reads


Unnamed: 0,bc,bc_count
0,XXX,10364582
1,GATTCTTCTTTATAA,3
2,GGAGAGGTATAAACC,1
3,GTATAACTACTGAAA,1
4,TGCGTAGGTATGAAC,1
...,...,...
161190,GGGTATAAACTGCAT,1
161191,GGTGGCTGTACAAAT,1
161192,TTTAACGCTTCTATC,1
161193,AAATTATGGCCATAA,1


3c_rep2_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_20-ds.0a2ba1e8bc0241508806d879e68a9b12/3c_rep2_sorghum.extendedFrags_filtered.fasta
3C
counted 43181408 reads


Unnamed: 0,bc,bc_count
0,XXX,9074038
1,GCTGCTTGCCAAGAA,1
2,TCAGTAGGTCAACCA,1
3,GAGTAGTGCGTCGAC,226
4,TGCGCTATTCAATAA,1
...,...,...
171354,ATCCATGTATTAGAA,1
171355,TATTTCTTTGGATAT,1
171356,TGACTATAGTTGTAC,1
171357,TTCAGCAGACCATGC,1


3c_rep3_sorghum.extendedFrags_filtered.fasta /data/davidding/dms/illumina_data/PROTOPLASTSUMMER24-425876467/BCLConvert_07_28_2024_02_06_09Z-753277525/EG_protoplast_24-ds.d0c5a57bc091412f999121d1f5e066cf/3c_rep3_sorghum.extendedFrags_filtered.fasta
3C
counted 36871291 reads


Unnamed: 0,bc,bc_count
0,XXX,8028994
1,TCGAGCAGGAATGCA,1
2,GTTCTAGTGTGGGAC,1227
3,GACGGAATGCACAAA,2
4,ACGGATCGACGTTAA,2
...,...,...
136679,GTTCTTAAGGTTGGG,1
136680,GATCATAGTGGATCC,1
136681,ACCGAATCAAAAGAC,1
136682,GCTTTAATAAATGAT,1
