In [1]:
pip install biopython

Collecting biopython
  Downloading biopython-1.79-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 36.0 MB/s eta 0:00:01
Installing collected packages: biopython
Successfully installed biopython-1.79
Note: you may need to restart the kernel to use updated packages.


### BWT ###

In [2]:
'''Class used for performing Burrows-Wheeler transformation.'''
from datetime import datetime

def print_curr_datetime(message):
    now = datetime.now()
    dtString = now.strftime("%d/%m/%Y %H:%M:%S")
    print(str(message) + " = ", dtString)

class BWT:
    transformedText = '' 
    saIndexes = []
    tots = dict()
    ranks = []
    firstCol = dict()
    
    
    def clear_parameters(self):
        self.transformedText = '' 
        self.saIndexes = []
        self.tots = dict()
        self.ranks = []
        self.firstCol = dict()
        

    def transform(self, text):
        print_curr_datetime('BWT transform Begin')
        '''Performs BW transformation and keeps the result in transformedText and saIndexes.
        Also, initializes tots (pairs char:#), ranks (for transformedText).'''
        self.clear_parameters()
        self.transformedText = self.bwt_via_sa(text) # Initializes transformedText, saIndexes
        self.rank_bwt()  # Initializes tots, ranks
        self.calculate_first_col() # Initializes firstCol
        print_curr_datetime('BWT transform End')
        
        
    def suffix_array(self, t):
        """ Given T return suffix array SA(T).  We use Python's sorted
            function here for simplicity, but we can do better. """
        saMatrix = [(t[i:], i) for i in range(len(t))]
        saMatrix.sort()
        # Extract and return just the offsets
        #     print(satups)
        return list(map(lambda x: x[1], saMatrix))
    
    
    def bwt_via_sa(self, t):
        """ Given T, returns BWT(T) by way of the suffix array. """
        bw = []
        self.saIndexes = self.suffix_array(t)
        for si in self.saIndexes:
            if si == 0: bw.append('$')
            else: bw.append(t[si-1])
        return ''.join(bw) # return string-ized version of list bw
    
    
    def rank_bwt(self):
        ''' Given BWT string bw, return parallel list of B-ranks.  Also
        returns tots: map from character to # times it appears. 
        Initializes tots and ranks.'''
        self.tots = dict()
        self.ranks = []
        for c in self.transformedText:
            if c not in self.tots: self.tots[c] = 0
            self.ranks.append(self.tots[c])
            self.tots[c] += 1
    
    
    def calculate_first_col(self):
        ''' Return map from character to the range of rows prefixed by
        the character. Initializes first column.'''
        self.firstCol = {}
        totc = 0
        for c, count in sorted(self.tots.items()):
            self.firstCol[c] = (totc, totc + count)
            totc += count
    
    
    def last_col_with_ranks(self):
        ''' Returns list with tuples (rank, char) for lastColumn.'''
        return list(zip(self.ranks, self.transformedText))
    
    
    def original_text(self):
        ''' Make T from BWT(T) '''
        rowi = 0 # Start in first row
        t = '$' # Start in rightmost character
        while self.transformedText[rowi] != '$':
            c = self.transformedText[rowi]
            t = c + t # Prepend to answer
            # Jump to row that starts with c of same ranke
            rowi = self.firstCol[c][0] + self.ranks[rowi]
        return t

In [3]:
# TESTS
text = 'abaaba$'
bwt = BWT()
bwt.transform(text)
original = bwt.original_text()

print("Original: " + original)
print()
# Expected: 'abaaba$'

print('First col')
print(bwt.firstCol)
print()
# Expected: {'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}

print('SaIndexes')
print(bwt.saIndexes)
print()
# Expected: [6, 5, 2, 3, 0, 4, 1]

print('Last col with ranks')
print(bwt.last_col_with_ranks())
print()
# Expected: [(0, 'a'), (0, 'b'), (1, 'b'), (1, 'a'), (0, '$'), (2, 'a'), (3, 'a')]


print('Tots.items()')
print(bwt.tots.items())
print()
# Expected: dict_items([('a', 4), ('b', 2), ('$', 1)])

BWT transform Begin =  30/05/2022 19:09:32
BWT transform End =  30/05/2022 19:09:32
Original: abaaba$

First col
{'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}

SaIndexes
[6, 5, 2, 3, 0, 4, 1]

Last col with ranks
[(0, 'a'), (0, 'b'), (1, 'b'), (1, 'a'), (0, '$'), (2, 'a'), (3, 'a')]

Tots.items()
dict_items([('a', 4), ('b', 2), ('$', 1)])



### FM Index ###

In [4]:
class FMIndex:
    
    bwt = {}
    tally = dict() # Key = character, Value = occurences of character (len of BWT(T))
    c = dict()  # Key = character, Value = (value for first row occurence)
    
    def __init__(self, text):
        text = text + '$'
        self.clear_parameters()
        print('Perform BWT')
        self.bwt = BWT() # We only care about L and SA
        self.bwt.transform(text)
        print('End BWT')
        
        print('Init. tally')
        self.init_tally() # Initialize tally
        print('Finish init. tally')
        
        print('Init. c')
        self.init_c() # Initialize c
        print('Finish init. c')
        
    
    def clear_parameters(self):
        self.bwt = {}
        self.tally = dict()
        self.c = dict()
        
    
    def init_tally(self):
        empty = []
        occurences = dict()
        for i in range(len(self.bwt.transformedText)):
            empty.append(0)
            occurences[self.bwt.transformedText[i]] = 0
            i += 1
            
        for char in self.bwt.firstCol.keys():
            self.tally[char] = empty.copy()
            
        for i in range(len(self.bwt.transformedText)):
            character = self.bwt.transformedText[i]
            occurences[character] += 1
            
            for char in self.tally.keys():
                self.tally[char][i] = occurences[char]
                

    def init_c(self):
        ''' Initializes c dictionary, where key = character 
        and value = tuple (index, value for first row occurence)'''
        i = 0
        for char in self.bwt.firstCol.keys():
            self.c[char] = (i, self.bwt.firstCol[char][0])
            i += 1
            
    
    def query(self, pattern):
        ''' Returns list of found positions for pattern in the text. 
        Rows are counted from 0 to n, not from 1.'''
        positions = []
        start = 1
        end = len(self.bwt.transformedText) - 1
        k = len(pattern) - 1
        while k > -1:
            ch = pattern[k]
            # start = C[ch], end = C[ch + 1] with break if ch + 1 > all keys
            if k == len(pattern) -1:
                start = self.c[ch][1]
                if self.c[ch][0] == len(self.c) -1:
                    break
                # Find C[ch + 1]
                index = self.c[ch][0] + 1
                newKey = ''
                for key in self.c:
                    if self.c[key][0] == index:
                        newKey = key
                        break
                end = self.c[newKey][1] - 1
            else:
                start = self.c[ch][1] + self.tally[ch][start - 1]
                end = self.c[ch][1] + self.tally[ch][end] - 1
                
            k -= 1
        for i in range(start, end + 1):
            positions.append(self.bwt.saIndexes[i])
        positions.sort()
        return positions.copy()

In [5]:
# TESTS
# text = 'abaaba' # Example - Expected
# text = 'GATGCGAGAGATG'
text = 'BANANA'

fmIndex = FMIndex(text)

print('Tally:')
print(fmIndex.tally)
print()
# Expected {'$': [0, 0, 0, 0, 1, 1, 1], 'a': [1, 1, 1, 2, 2, 3, 4], 'b': [0, 1, 2, 2, 2, 2, 2]}

print('C')
print(fmIndex.c)
print()
# Expected {'$': (0, 0), 'a': (1, 1), 'b': (2, 5)}

# pattern = 'aba'
# pattern = 'GAGA'
pattern = 'ANA'
print('Query for pattern: ' + pattern)
print(fmIndex.query(pattern))
print()

# Expected
# For text='GAGA' -> [5,7]
# For text='ANA' -> [1,3]

Perform BWT
BWT transform Begin =  30/05/2022 19:09:39
BWT transform End =  30/05/2022 19:09:39
End BWT
Init. tally
Finish init. tally
Init. c
Finish init. c
Tally:
{'$': [0, 0, 0, 0, 1, 1, 1], 'A': [1, 1, 1, 1, 1, 2, 3], 'B': [0, 0, 0, 1, 1, 1, 1], 'N': [0, 1, 2, 2, 2, 2, 2]}

C
{'$': (0, 0), 'A': (1, 1), 'B': (2, 4), 'N': (3, 5)}

Query for pattern: ANA
[1, 3]



### Global alignment ###

In [6]:
import numpy

class Aligner:
    match = 0
    mismatch = 0
    gap = 0
    
    def __init__(self, match, mismatch, gap):
        ''' Match, mismatch, gap '''
        self.match = match
        self.mismatch = mismatch
        self.gap = gap
        

    def scoring_matrix(self, a, b):
        if a == b: return self.match
        if a == '_' or b == '_' : return self.gap
        return self.mismatch


    def global_alignment(self, x, y):
        ''' Accepts x, y '''
        D = numpy.zeros((len(x) + 1, len(y) + 1), dtype=int)
        
        for i in range(1, len(x) + 1):
            D[i,0] = D[i-1,0] + self.scoring_matrix(x[i-1], '_')  
        for j in range(1, len(y)+1):
            D[0,j] = D[0,j-1] + self.scoring_matrix('_', y[j-1])
        
        for i in range(1, len(x) + 1):
            for j in range(1, len(y) + 1):
                D[i,j] = max(D[i-1,j]   + self.scoring_matrix(x[i-1], '_'),
                             D[i,j-1]   + self.scoring_matrix('_', y[j-1]), 
                             D[i-1,j-1] + self.scoring_matrix(x[i-1], y[j-1]))
                
        # function returns table and global alignment score
        #alignment score is in cell (n,m) of the matrix
        return D, D[len(x),len(y)] 


    def traceback(self, x, y, V):
        ''' Accepts x, y, V '''
        # initializing starting position cell(n,m)
        i=len(x)
        j=len(y)
        
        # initializing strings we use to represent alignments in x, y, edit transcript and global alignment
        ax, ay, am, tr = '', '', '', ''
        
        # exit condition is when we reach cell (0,0)
        while i > 0 or j > 0:
            
            # calculating diagonal, horizontal and vertical scores for current cell
            d, v, h = -100, -100, -100
            
            if i > 0 and j > 0:
                delta = 1 if x[i-1] == y[j-1] else 0
                d = V[i-1,j-1] + self.scoring_matrix(x[i-1], y[j-1])  # diagonal movement   
            if i > 0: v = V[i-1,j] + self.scoring_matrix(x[i-1], '_')  # vertical movement
            if j > 0: h = V[i,j-1] + self.scoring_matrix('_', y[j-1])  # horizontal movement
                
            # backtracing to next (previous) cell
            if d >= v and d >= h:
                ax += x[i-1]
                ay += y[j-1]
                if delta == 1:
                    tr += 'M'
                    am += '|'
                else:
                    tr += 'R'
                    am += ' '
                i -= 1
                j -= 1
            elif v >= h:
                ax += x[i-1]
                ay += '_'
                tr += 'D'
                am += ' '
                i -= 1
            else:
                ay += y[j-1]
                ax += '_'
                tr += 'I'
                am += ' '
                j -= 1
                
        alignment='\n'.join([ax[::-1], am[::-1], ay[::-1]])
        return alignment, tr[::-1]


In [7]:
# TESTS

x = 'TACGTCAGC'
y = 'TATGTCATGC'


match = 2 # 0 1 2
mismatch = -3 # -3 -2
gap = -7 # -5, -7

aligner = Aligner(match, mismatch, gap)

D, alignmentScore = aligner.global_alignment(x, y)
alignment, transcript = aligner.traceback(x, y, D)

print(alignment)
print(transcript)
print(D)
print(alignmentScore)
print(transcript)

'''
# Expected for match = 2, mismatch = -3, gap = -7: 

TACGTCA_GC
|| |||| ||
TATGTCATGC
MMRMMMMIMM
[[  0  -7 -14 -21 -28 -35 -42 -49 -56 -63 -70]
 [ -7   2  -5 -12 -19 -26 -33 -40 -47 -54 -61]
 [-14  -5   4  -3 -10 -17 -24 -31 -38 -45 -52]
 [-21 -12  -3   1  -6 -13 -15 -22 -29 -36 -43]
 [-28 -19 -10  -6   3  -4 -11 -18 -25 -27 -34]
 [-35 -26 -17  -8  -4   5  -2  -9 -16 -23 -30]
 [-42 -33 -24 -15 -11  -2   7   0  -7 -14 -21]
 [-49 -40 -31 -22 -18  -9   0   9   2  -5 -12]
 [-56 -47 -38 -29 -20 -16  -7   2   6   4  -3]
 [-63 -54 -45 -36 -27 -23 -14  -5  -1   3   6]]
6
MMRMMMMIMM
'''

TACGTCA_GC
|| |||| ||
TATGTCATGC
MMRMMMMIMM
[[  0  -7 -14 -21 -28 -35 -42 -49 -56 -63 -70]
 [ -7   2  -5 -12 -19 -26 -33 -40 -47 -54 -61]
 [-14  -5   4  -3 -10 -17 -24 -31 -38 -45 -52]
 [-21 -12  -3   1  -6 -13 -15 -22 -29 -36 -43]
 [-28 -19 -10  -6   3  -4 -11 -18 -25 -27 -34]
 [-35 -26 -17  -8  -4   5  -2  -9 -16 -23 -30]
 [-42 -33 -24 -15 -11  -2   7   0  -7 -14 -21]
 [-49 -40 -31 -22 -18  -9   0   9   2  -5 -12]
 [-56 -47 -38 -29 -20 -16  -7   2   6   4  -3]
 [-63 -54 -45 -36 -27 -23 -14  -5  -1   3   6]]
6
MMRMMMMIMM


'\n# Expected for match = 2, mismatch = -3, gap = -7: \n\nTACGTCA_GC\n|| |||| ||\nTATGTCATGC\nMMRMMMMIMM\n[[  0  -7 -14 -21 -28 -35 -42 -49 -56 -63 -70]\n [ -7   2  -5 -12 -19 -26 -33 -40 -47 -54 -61]\n [-14  -5   4  -3 -10 -17 -24 -31 -38 -45 -52]\n [-21 -12  -3   1  -6 -13 -15 -22 -29 -36 -43]\n [-28 -19 -10  -6   3  -4 -11 -18 -25 -27 -34]\n [-35 -26 -17  -8  -4   5  -2  -9 -16 -23 -30]\n [-42 -33 -24 -15 -11  -2   7   0  -7 -14 -21]\n [-49 -40 -31 -22 -18  -9   0   9   2  -5 -12]\n [-56 -47 -38 -29 -20 -16  -7   2   6   4  -3]\n [-63 -54 -45 -36 -27 -23 -14  -5  -1   3   6]]\n6\nMMRMMMMIMM\n'

### Seed&Extend ###

In [7]:
def reverse_complement(read):
    ''' read '''
    reversedRead = []
    i = len(read) - 1
    while i >= 0:
        if read[i] == 'A':
            reversedRead.append('T')
        elif read[i] == 'T':
            reversedRead.append('A')
        elif read[i] == 'C':
            reversedRead.append('G')
        elif read[i] == 'G':
            reversedRead.append('C')
        i -= 1
    return reversedRead


def seed_and_extend(referenceGenome, readId, isReverseComplement, read, seedLength, margin, aligner, fmIndex):
    ''' referenceGenome, readId, read, seedLength, margin, aligner, fmIndex'''
    results = []
    seed = read[0:seedLength]
    
    seedPositions = fmIndex.query(seed)
    
    i = 0
    for position in seedPositions:
        # print(position)
        start = position + seedLength
        end = start + len(read) - seedLength + margin # end is not counted in slicing
        if end >= len(referenceGenome):
            continue
            
        alignedRef = referenceGenome[start:end]
        alignedRead = read[seedLength:]
        
        D, alignmentScore = aligner.global_alignment(alignedRef, alignedRead)
        alignment, transcript = aligner.traceback(alignedRef, alignedRead, D)
        
        # print('alignment completed for ' + str(i))
        
        i += 1
        
        results.append((readId, isReverseComplement, start - seedLength, alignmentScore, transcript))
    
    # print('seed_and_extend completed')
    results.sort(key=lambda x: x[3], reverse=True)
    return results

In [10]:
# TESTS

referenceGenome = 'AAGAAGTCAGGGAGCAAGCAGAGTCAGGGAGCAAGCCACCAC'
read = 'AGTCAGGGAGCAAGC'
reversedRead = reverse_complement(read)
seek_length = 10
margin = 2
aligner = Aligner(1, -3, -7)
fmIndex = FMIndex(referenceGenome)
results = seed_and_extend(referenceGenome, 'a', True, read, seek_length, margin, aligner, fmIndex)
print(results)
print(reversedRead)

# Expected for referenceGenome = 'AAGAAGTCAGGGAGCAAGCAGAGTCAGGGAGCAAGCCACCAC'
# read = 'AGTCAGGGAGCAAGC'
# found seed positions: 4, 21
# [('a', True, 4, -9, 'MMMMMDD'), ('a', True, 21, -9, 'MMMMDMD')]
# reversed read 
# ['G', 'C', 'T', 'T', 'G', 'C', 'T', 'C', 'C', 'C', 'T', 'G', 'A', 'C', 'T']

Perform BWT
BWT transform Begin =  29/05/2022 15:54:30
BWT transform End =  29/05/2022 15:54:30
End BWT
Init. tally
Finish init. tally
Init. c
Finish init. c
[('a', True, 4, -9, 'MMMMMDD'), ('a', True, 21, -9, 'MMMMDMD')]
['G', 'C', 'T', 'T', 'G', 'C', 'T', 'C', 'C', 'C', 'T', 'G', 'A', 'C', 'T']


### Main program ###

In [8]:
from Bio import SeqIO
import sys
import pandas as pd
from datetime import datetime
import os


def readFASTA(fasta_file):
  arr = []
  for seq_record in SeqIO.parse(fasta_file, "fasta"):
    arr.append(seq_record.seq)
  return arr[0]


def readFASTQ(fastq_file):
  arr = []
  for seq_record in SeqIO.parse(fastq_file, "fastq"):
    arr.append([seq_record.id, seq_record.seq])
  return arr


# fasta = 'reference'
# fastq = 'reads'

# print(readFASTA(fasta + '.fasta'))
# print(readFASTQ(fastq + '.fastq')[0])


def store_to_csv(data, exportFileName):
    df = pd.DataFrame(data, columns =['read_id', 'is_rev_comp', 'position','alignment_score','transcript'])
    df.to_csv(exportFileName)


# Main program
# def main():
def main(fasta, fastq, match, mismatch, gap, seedlength, margin, fmIndex, referenceGenome, reads):
    # To pass argv in Spyder -> Run>Configuration per file>Command line options. i.e: 2 3 a
    # script, fasta, fastq, match, mismatch, gap, seedlength, margin = sys.argv
    # To pass argv in Spyder -> Run>Configuration per file>Command line options. i.e: 2 3 a
    # script, fasta, fastq, match, mismatch, gap, seedlength, margin = sys.argv
    # match = int(match)
    # mismatch= int(mismatch)
    # gap = int(gap)
    # seedlength = int(seedlength)
    # margin = int(margin)
    
    # fasta = 'example_human_reference.fasta'
    # fastq = 'medium_example_human_Illumina.pe_1.fastq'
    # fasta = 'reference.fasta'
    # fastq = 'reads.fastq'
    # match = 2
    # mismatch = -3
    # gap = -7
    # seedlength = 10
    # margin = 2
    
    match = int(match)
    mismatch= int(mismatch)
    gap = int(gap)
    seedlength = int(seedlength)
    margin = int(margin)
    
    print_curr_datetime("ANALYSIS START" + str(match) + str(mismatch) + str(gap))
    
    
    # fasta += '.fasta'
    # fastq += '.fastq'
    # print('Read files')
    # referenceGenome = readFASTA(fasta)
    # reads = readFASTQ(fastq)
    # print('Files read')
    # print()

        
     
    # print('Begin fm index init.')
    # fmIndex = FMIndex(referenceGenome)
    # print('End fm index init.')
    # print()
    
    print('Begin aligner init.')
    aligner = Aligner(match, mismatch, gap)
    print('End aligner init.')
    print()
    
    i = 1
    print("Start reads")
    # fileName = 'results/result_read'

    step = 20
    bestResults = []
    for read in reads:
        if i % step == 0:
            print_curr_datetime("Start datetime for read " + str(i))
        
        results = seed_and_extend(referenceGenome, read[0], False, read[1], seedlength, margin, aligner, fmIndex)
        if (len(results) > 0):
            bestResults.append(results[0]) # For all
            
        # store_to_csv(results, fileName + str(i)+'.csv')
        
        if i % step == 0:
            print_curr_datetime("End datetime for read " + str(i))
        
        
        if i % step == 0:
            print_curr_datetime("Start datetime for reversed read " + str(i))
        
        reverseCompRead = reverse_complement(read[1])
        
        results = seed_and_extend(referenceGenome, read[0], True, reverseCompRead, seedlength, margin, aligner, fmIndex)
        if (len(results) > 0):
            bestResults.append(results[0]) # For all
            
        # store_to_csv(results, fileName + 'reversed' + str(i)+'.csv')
        
        if i % step == 0:
            print_curr_datetime("End datetime for reversed read " + str(i))
        
        i += 1
    
    
    store_to_csv(bestResults, outputdir + '/our_results' + str(match) + str(mismatch) + str(gap) + '.csv')
    
    print("Finish reads and all")
    
    print_curr_datetime("ANALYSIS FINISHED" + str(match) + str(mismatch) + str(gap))
    
    '''
    # TESTS
    referenceGenome = 'AAGAAGTCAGGGAGCAAGCAGAGTCAGGGAGCAAGCCACCAC'
    read = 'AGTCAGGGAGCAAGC'
    reversedRead = reverse_complement(read)
    seek_length = 10
    margin = 2

    
    
    aligner = Aligner(1, -3, -7)
    fmIndex = FMIndex(referenceGenome)
    results = seed_and_extend(referenceGenome, read, seek_length, margin, aligner, fmIndex)
    print(results)
    '''

if __name__ == "__main__":
    fasta = 'example_human_reference.fasta'
    # fastq = 'medium_example_human_Illumina.pe_1.fastq'
    # fasta = "dummy.fasta"
    fastq = "medium_example_human_Illumina.pe_1.fastq"
    seed = 10
    margin = 2
    
    outputdir = 'results_ours'
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
        
    print('Read files')
    referenceGenome = readFASTA(fasta)
    reads = readFASTQ(fastq)
    print('Files read')
    print()
    
    print('Begin fm index init.')
    fmIndex = FMIndex(referenceGenome)
    print('End fm index init.')
    print()
    
    for match in range(1,3):
        for mismatch in range(-3,-1):
            for gap in range(-7, -4):
                if gap == -6:
                    continue
                main(fasta, fastq, match, mismatch, gap, seed, margin, fmIndex, referenceGenome, reads)
    # main()

    '''
    for match in range(0,3):
        for mismatch in range(-3,-1):
            for gap in range(-7, -4):
                if gap == -5:
                    continue
                aligner = Aligner(match, mismatch, gap)
    '''


Read files
Files read

Begin fm index init.
Perform BWT
BWT transform Begin =  29/05/2022 16:37:28
BWT transform End =  29/05/2022 16:37:28
End BWT
Init. tally
Finish init. tally
Init. c
Finish init. c
End fm index init.

ANALYSIS START1-3-7 =  29/05/2022 16:37:28
Begin aligner init.
End aligner init.

Start reads
Start datetime for read 20 =  29/05/2022 16:41:42
End datetime for read 20 =  29/05/2022 16:41:42
Start datetime for reversed read 20 =  29/05/2022 16:41:42
End datetime for reversed read 20 =  29/05/2022 16:41:42
Start datetime for read 40 =  29/05/2022 16:45:51
End datetime for read 40 =  29/05/2022 16:46:25
Start datetime for reversed read 40 =  29/05/2022 16:46:25
End datetime for reversed read 40 =  29/05/2022 16:46:25
Finish reads and all
ANALYSIS FINISHED1-3-7 =  29/05/2022 16:46:26
ANALYSIS START1-3-5 =  29/05/2022 16:46:26
Begin aligner init.
End aligner init.

Start reads
Start datetime for read 20 =  29/05/2022 16:50:38
End datetime for read 20 =  29/05/2022 16:50:

In [8]:
!ls /sbgenomics/project-files

example_human_Illumina.pe_1.fastq  example_human_reference.fasta
example_human_Illumina.pe_2.fastq
