__Analysis of ribosome profiling__

This data was downloaded from Tripz-Viz on 10-1-19 from the following link (https://trips.ucc.ie/homo_sapiens/Gencode_v25/interactive_plot/?files=&ribo_studies=18,20,21,23,24,27,28,29,31,32,33,34,35,38,39,42,43,44,45,56,58,60,62,63,64,67,89,90,99,101,102,103,107,113,122,124,130,134,138,141,144,150,152,153,165,171,172,176,177,178,179,181,183,190,191,192,201,204,212,&tran=ENST00000268124&minread=25&maxread=150&user_dir=fiveprime&ambig=F&cov=F&lg=T&nuc=F&rs=0&crd=F&short=mcr) with the following transcript identifier (ENST00000268124)

Replace the file in riboseq_table with whatever file name you have chosen

In [40]:
from Bio.Seq import Seq
from Bio import Entrez
from Bio import SeqIO
from Bio.Alphabet import IUPAC
import matplotlib
import matplotlib.pyplot as plt
% matplotlib inline
import numpy as np
from Bio.Align.Applications import MuscleCommandline
import os
from Bio import SeqIO
import csv
from itertools import islice



In [41]:
cwd = os.getcwd()+os.sep
riboseq_table = cwd + 'Representative_Species\Counts.csv'

In [42]:
def processRiboSeqcsv(riboseq_file):
    riboseq_dict = {}
    Position_list = []
    Sequence_list = []
    Frame1_list = []
    Frame2_list = []
    Frame3_list = []
    RNASeq_list = []
    with open(riboseq_file) as f:
            reader = csv.DictReader(f, delimiter = ",")
            for row in reader:
                Position_list.append(row['ï»¿Position'])
                Sequence_list.append(row['Sequence'])
                Frame1_list.append(row['Frame 1'])
                Frame2_list.append(row['Frame 2'])
                Frame3_list.append(row['Frame 3'])
                RNASeq_list.append(row['RNA-Seq'])
            
    Riboseq_dict = {'Position':Position_list,'Sequence':Sequence_list,
                   'Frame1':Frame1_list,'Frame2':Frame2_list,'Frame3':Frame3_list,'RNA_Seq':RNASeq_list}
    return Riboseq_dict

In [43]:
def queryRiboSeq(sequence,Riboseq_dict):
    full_length_sequence = ''
    for nt in Riboseq_dict['Sequence']:
        full_length_sequence += nt
    if full_length_sequence.count(sequence) == 0:
        print('Sequence was not found')
        return
    if full_length_sequence.count(sequence) !=1:
        print('This sequence occurs more than once, try again with a unique sequence')
        return
    if full_length_sequence.count(sequence) ==1:
        beg_index = 1+full_length_sequence.find(sequence)
        end_index = beg_index + len(sequence)
        
    print(beg_index)
    print(end_index)
    subsequence_dict = {}
    index_counter = beg_index
    for nt in sequence:
        if index_counter == end_index:
            continue
        
        Frame1_count = Riboseq_dict['Frame1'][index_counter-1]
        Frame2_count = Riboseq_dict['Frame2'][index_counter-1]
        Frame3_count = Riboseq_dict['Frame3'][index_counter-1]
        RNASeq_count = Riboseq_dict['RNA_Seq'][index_counter-1]
        
        subsequence_dict[index_counter] = {}
        subsequence_dict[index_counter]['Nucleotide'] = {nt}
        subsequence_dict[index_counter]['Frame1'] = {int(Frame1_count)}
        subsequence_dict[index_counter]['Frame2'] = {int(Frame2_count)}
        subsequence_dict[index_counter]['Frame3'] = {int(Frame3_count)}
        subsequence_dict[index_counter]['RNASeq'] = {int(RNASeq_count)}
        index_counter +=1
    return subsequence_dict
        
        
    

In [44]:
def printRiboSeqAnalysis(sequence_dict):
    full_subsequence = ''
    x_total_fr1_counts = float(0)
    x_total_fr2_counts = float(0)
    x_total_fr3_counts = float(0)
    total_fr1_counts = float(0)
    total_fr2_counts = float(0)
    total_fr3_counts = float(0)
    total_all_counts = float(0)
    for index in sequence_dict:
        for item in sequence_dict[index]['Nucleotide']:
            nt = item
        full_subsequence += nt
        for item in sequence_dict[index]['Frame1']:
            fr1_count = float(item)
        for item in sequence_dict[index]['Frame2']:
            fr2_count = float(item)
        for item in sequence_dict[index]['Frame3']:
            fr3_count = float(item)
        for item in sequence_dict[index]['RNASeq']:
            RNASeq_count = float(item)
            if RNASeq_count == 0:
                RNASeq_count = 1
        
        
        x_total_fr1_counts += (fr1_count/RNASeq_count)
        x_total_fr2_counts += (fr2_count/RNASeq_count)
        x_total_fr3_counts += (fr3_count/RNASeq_count)
    total_fr1_counts = x_total_fr1_counts/float(len(full_subsequence))
    total_fr2_counts = x_total_fr2_counts/float(len(full_subsequence))
    total_fr3_counts = x_total_fr3_counts/float(len(full_subsequence))
    total_all_counts = total_fr1_counts+total_fr2_counts+total_fr3_counts
    #counts_per_nt = total_all_counts/float(len(full_subsequence))
    print('For the sequence %s, Frame 1 has %f reads, Frame 2 has %f reads, Frame 3 has %f reads, and there are a total of %f reads. '%(full_subsequence,total_fr1_counts,total_fr2_counts,total_fr3_counts,total_all_counts))

In [45]:
#The sequences here are for Homo sapiens. If you choose to do a different organism, obviously this sequences need to be changed

uORF_sequence = 'ATGGTCAAACCCATTTCACTGACAGGAGAGCAGAGACAGGACGTGTCTCTCTCCACGTCTTCCAGCCAGTAAA'
CTG_sequence = 'CTGGAGCCCAAAGCCAGGTGTTCTGACTCCCAGCGTGGGGGTCCCTGCACCAACCATGAGCCGCCTGCTCTGGAGGAAGGTGGCCGGCGCCACCGTCGGGCCAGGGCCGGTTCCAGCTCCGGGGCGCTGGGTCTCCAGCTCCGTCCCCGCGTCCGACCCCAGCGACGGGCAGCGGCGGCGGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAACAGCAGCCTCAGCAGCCGCAAGTGCTATCCTCGGAGGGCGGGCAGCTGCGGCACAACCCATTGGACATCCAGATGCTCTCGAGAGGGCTGCACGAGCAAATCTTCGGGCAAGGAGGGGAGATGCCTGGCGAGGCCGCGGTGCGCCGCAGCGTCGAGCACCTGCAGAAGCACGGGCTCTGGGGGCAGCCAGCCGTGCCCTTGCCCGACGTGGAGCTGCGCCTGCCGCCCCTCTACGGGGACAACCTGGACCAGCACTTCCGCCTCCTGGCCCAGAAGCAGAGCCTGCCCTACCTGGAGGCGGCCAACTTGCTGTTGCAGGCCCAGCTGCCCCCGAAGCCCCCGGCTTGGGCCTGGGCGGAGGGCTGGACCCGGTACGGCCCCGAGGGGGAGGCCGTACCCGTGGCCATCCCCGAGGAGCGGGCCCTGGTGTTCGACGTGGAGGTCTGCTTGGCAGAGGGAACTTGCCCCACATTGGCGGTGGCCATATCCCCCTCGGCCTGGTATTCCTGGTGCAGCCAGCGGCTGGTGGAAGAGCGTTACTCTTGGACCAGCCAGCTGTCGCCGGCTGA'
Main_ORF= 'ATGAGCCGCCTGCTCTGGAGGAAGGTGGCCGGCGCCACCGTCGGGCCAGGGCCGGTTCCAGCTCCGGGGCGCTGGGTCTCCAGCTCCGTCCCCGCGTCCGACCCCAGCGACGGGCAGCGGCGGCGGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAACAGCAGCCTCAGCAGCCGCAAGTGCTATCCTCGGAGGGCGGGCAGCTGCGGCACAACCCATTGGACATCCAGATGCTCTCGAGAGGGCTGCACGAGCAAATCTTCGGGCAAGGAGGGGAGATGCCTGGCGAGGCCGCGGTGCGCCGCAGCGTCGAGCACCTGCAGAAGCACGGGCTCTGGGGGCAGCCAGCCGTGCCCTTGCCCGACGTGGAGCTGCGCCTGCCGCCCCTCTACGGGGACAACCTGGACCAGCACTTCCGCCTCCTGGCCCAGAAGCAGAGCCTGCCCTACCTGGAGGCGGCCAACTTGCTGTTGCAGGCCCAGCTGCCCCCGAAGCCCCCGGCTTGGGCCTGGGCGGAGGGCTGGACCCGGTACGGCCCCGAGGGGGAGGCCGTACCCGTGGCCATCCCCGAGGAGCGGGCCCTGGTGTTCGACGTGGAGGTCTGCTTGGCAGAGGGAACTTGCCCCACATTGGCGGTGGCCATATCCCCCTCGGCCTGGTATTCCTGGTGCAGCCAGCGGCTGGTGGAAGAGCGTTACTCTTGGACCAGCCAGCTGTCGCCGGCTGACCTCATCCCCCTGGAGGTCCCTACTGGTGCCAGCAGCCCCACCCAGAGAGACTGGCAGGAGCAGTTAGTGGTGGGGCACAATGTTTCCTTTGACCGAGCTCATATCAGGGAGCAGTACCTGATCCAGGGTTCCCGCATGCGTTTCCTGGACACCATGAGCATGCACATGGCCATCTCAGGGCTAAGCAGCTTCCAGCGCAGTCTGTGGATAGCAGCCAAGCAGGGCAAACACAAGGTCCAGCCCCCCACAAAGCAAGGCCAGAAGTCCCAGAGGAAAGCCAGAAGAGGCCCAGCGATCTCATCCTGGGACTGGCTGGACATCAGCAGTGTCAACAGTCTGGCAGAGGTGCACAGACTTTATGTAGGGGGGCCTCCCTTAGAGAAGGAGCCTCGAGAACTGTTTGTGAAGGGCACCATGAAGGACATTCGTGAGAACTTCCAGGACCTGATGCAGTACTGTGCCCAGGACGTGTGGGCCACCCATGAGGTTTTCCAGCAGCAGCTACCGCTCTTCTTGGAGAGGTGTCCCCACCCAGTGACTCTGGCCGGCATGCTGGAGATGGGTGTCTCCTACCTGCCTGTCAACCAGAACTGGGAGCGTTACCTGGCAGAGGCACAGGGCACTTATGAGGAGCTCCAGCGGGAGATGAAGAAGTCGTTGATGGATCTGGCCAATGATGCCTGCCAGCTGCTCTCAGGAGAGAGGTACAAAGAAGACCCCTGGCTCTGGGACCTGGAGTGGGACCTGCAAGAATTTAAGCAGAAGAAAGCTAAGAAGGTGAAGAAGGAACCAGCCACAGCCAGCAAGTTGCCCATCGAGGGGGCTGGGGCCCCTGGTGATCCCATGGATCAGGAAGACCTCGGCCCCTGCAGTGAGGAGGAGGAGTTTCAACAAGATGTCATGGCCCGCGCCTGCTTGCAGAAGCTGAAGGGGACCACAGAGCTCCTGCCCAAGCGGCCCCAGCACCTTCCTGGACACCCTGGATGGTACCGGAAGCTCTGCCCCCGGCTAGACGACCCTGCATGGACCCCGGGCCCCAGCCTCCTCAGCCTGCAGATGCGGGTCACACCTAAACTCATGGCACTTACCTGGGATGGCTTCCCTCTGCACTACTCAGAGCGTCATGGCTGGGGCTACTTGGTGCCTGGGCGGCGGGACAACCTGGCCAAGCTGCCGACAGGTACCACCCTGGAGTCAGCTGGGGTGGTCTGCCCCTACAGAGCCATCGAGTCCCTGTACAGGAAGCACTGTCTCGAACAGGGGAAGCAGCAGCTGATGCCCCAGGAGGCCGGCCTGGCGGAGGAGTTCCTGCTCACTGACAATAGTGCCATATGGCAAACGGTAGAAGAACTGGATTACTTAGAAGTGGAGGCTGAGGCCAAGATGGAGAACTTGCGAGCTGCAGTGCCAGGTCAACCCCTAGCTCTGACTGCCCGTGGTGGCCCCAAGGACACCCAGCCCAGCTATCACCATGGCAATGGACCTTACAACGACGTGGACATCCCTGGCTGCTGGTTTTTCAAGCTGCCTCACAAGGATGGTAATAGCTGTAATGTGGGAAGCCCCTTTGCCAAGGACTTCCTGCCCAAGATGGAGGATGGCACCCTGCAGGCTGGCCCAGGAGGTGCCAGTGGGCCCCGTGCTCTGGAAATCAACAAAATGATTTCTTTCTGGAGGAACGCCCATAAACGTATCAGCTCCCAGATGGTGGTGTGGCTGCCCAGGTCAGCTCTGCCCCGTGCTGTGATCAGGCACCCCGACTATGATGAGGAAGGCCTCTATGGGGCCATCCTGCCCCAAGTGGTGACTGCCGGCACCATCACTCGCCGGGCTGTGGAGCCCACATGGCTCACCGCCAGCAATGCCCGGCCTGACCGAGTAGGCAGTGAGTTGAAAGCCATGGTGCAGGCCCCACCTGGCTACACCCTTGTGGGTGCTGATGTGGACTCCCAAGAGCTGTGGATTGCAGCTGTGCTTGGAGACGCCCACTTTGCCGGCATGCATGGCTGCACAGCCTTTGGGTGGATGACACTGCAGGGCAGGAAGAGCAGGGGCACTGATCTACACAGTAAGACAGCCACTACTGTGGGCATCAGCCGTGAGCATGCCAAAATCTTCAACTACGGCCGCATCTATGGTGCTGGGCAGCCCTTTGCTGAGCGCTTACTAATGCAGTTTAACCACCGGCTCACACAGCAGGAGGCAGCTGAGAAGGCCCAGCAGATGTACGCTGCCACCAAGGGCCTCCGCTGGTATCGGCTGTCGGATGAGGGCGAGTGGCTGGTGAGGGAGTTGAACCTCCCAGTGGACAGGACTGAGGGTGGCTGGATTTCCCTGCAGGATCTGCGCAAGGTCCAGAGAGAAACTGCAAGGAAGTCACAGTGGAAGAAGTGGGAGGTGGTTGCTGAACGGGCATGGAAGGGGGGCACAGAGTCAGAAATGTTCAATAAGCTTGAGAGCATTGCTACGTCTGACATACCACGTACCCCGGTGCTGGGCTGCTGCATCAGCCGAGCCCTGGAGCCCTCGGCTGTCCAGGAAGAGTTTATGACCAGCCGTGTGAATTGGGTGGTACAGAGCTCTGCTGTTGACTACTTACACCTCATGCTTGTGGCCATGAAGTGGCTGTTTGAAGAGTTTGCCATAGATGGGCGCTTCTGCATCAGCATCCATGACGAGGTTCGCTACCTGGTGCGGGAGGAGGACCGCTACCGCGCTGCCCTGGCCTTGCAGATCACCAACCTCTTGACCAGGTGCATGTTTGCCTACAAGCTGGGTCTGAATGACTTGCCCCAGTCAGTCGCCTTTTTCAGTGCAGTCGATATTGACCGGTGCCTCAGGAAGGAAGTGACCATGGATTGTAAAACCCCTTCCAACCCAACTGGGATGGAAAGGAGATACGGGATTCCCCAGGGTGAAGCGCTGGATATTTACCAGATAATTGAACTCACCAAAGGCTCCTTGGAAAAACGAAGCCAGCCTGGACCATAG'
Overlap_ORF = 'ATGAGCCGCCTGCTCTGGAGGAAGGTGGCCGGCGCCACCGTCGGGCCAGGGCCGGTTCCAGCTCCGGGGCGCTGGGTCTCCAGCTCCGTCCCCGCGTCCGACCCCAGCGACGGGCAGCGGCGGCGGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAACAGCAGCCTCAGCAGCCGCAAGTGCTATCCTCGGAGGGCGGGCAGCTGCGGCACAACCCATTGGACATCCAGATGCTCTCGAGAGGGCTGCACGAGCAAATCTTCGGGCAAGGAGGGGAGATGCCTGGCGAGGCCGCGGTGCGCCGCAGCGTCGAGCACCTGCAGAAGCACGGGCTCTGGGGGCAGCCAGCCGTGCCCTTGCCCGACGTGGAGCTGCGCCTGCCGCCCCTCTACGGGGACAACCTGGACCAGCACTTCCGCCTCCTGGCCCAGAAGCAGAGCCTGCCCTACCTGGAGGCGGCCAACTTGCTGTTGCAGGCCCAGCTGCCCCCGAAGCCCCCGGCTTGGGCCTGGGCGGAGGGCTGGACCCGGTACGGCCCCGAGGGGGAGGCCGTACCCGTGGCCATCCCCGAGGAGCGGGCCCTGGTGTTCGACGTGGAGGTCTGCTTGGCAGAGGGAACTTGCCCCACATTGGCGGTGGCCATATCCCCCTCGGCCTGGTATTCCTGGTGCAGCCAGCGGCTGGTGGAAGAGCGTTACTCTTGGACCAGCCAGCTGTCGCCGGCTGA'
Nonoverlap_ORF = 'CCTCATCCCCCTGGAGGTCCCTACTGGTGCCAGCAGCCCCACCCAGAGAGACTGGCAGGAGCAGTTAGTGGTGGGGCACAATGTTTCCTTTGACCGAGCTCATATCAGGGAGCAGTACCTGATCCAGGGTTCCCGCATGCGTTTCCTGGACACCATGAGCATGCACATGGCCATCTCAGGGCTAAGCAGCTTCCAGCGCAGTCTGTGGATAGCAGCCAAGCAGGGCAAACACAAGGTCCAGCCCCCCACAAAGCAAGGCCAGAAGTCCCAGAGGAAAGCCAGAAGAGGCCCAGCGATCTCATCCTGGGACTGGCTGGACATCAGCAGTGTCAACAGTCTGGCAGAGGTGCACAGACTTTATGTAGGGGGGCCTCCCTTAGAGAAGGAGCCTCGAGAACTGTTTGTGAAGGGCACCATGAAGGACATTCGTGAGAACTTCCAGGACCTGATGCAGTACTGTGCCCAGGACGTGTGGGCCACCCATGAGGTTTTCCAGCAGCAGCTACCGCTCTTCTTGGAGAGGTGTCCCCACCCAGTGACTCTGGCCGGCATGCTGGAGATGGGTGTCTCCTACCTGCCTGTCAACCAGAACTGGGAGCGTTACCTGGCAGAGGCACAGGGCACTTATGAGGAGCTCCAGCGGGAGATGAAGAAGTCGTTGATGGATCTGGCCAATGATGCCTGCCAGCTGCTCTCAGGAGAGAGGTACAAAGAAGACCCCTGGCTCTGGGACCTGGAGTGGGACCTGCAAGAATTTAAGCAGAAGAAAGCTAAGAAGGTGAAGAAGGAACCAGCCACAGCCAGCAAGTTGCCCATCGAGGGGGCTGGGGCCCCTGGTGATCCCATGGATCAGGAAGACCTCGGCCCCTGCAGTGAGGAGGAGGAGTTTCAACAAGATGTCATGGCCCGCGCCTGCTTGCAGAAGCTGAAGGGGACCACAGAGCTCCTGCCCAAGCGGCCCCAGCACCTTCCTGGACACCCTGGATGGTACCGGAAGCTCTGCCCCCGGCTAGACGACCCTGCATGGACCCCGGGCCCCAGCCTCCTCAGCCTGCAGATGCGGGTCACACCTAAACTCATGGCACTTACCTGGGATGGCTTCCCTCTGCACTACTCAGAGCGTCATGGCTGGGGCTACTTGGTGCCTGGGCGGCGGGACAACCTGGCCAAGCTGCCGACAGGTACCACCCTGGAGTCAGCTGGGGTGGTCTGCCCCTACAGAGCCATCGAGTCCCTGTACAGGAAGCACTGTCTCGAACAGGGGAAGCAGCAGCTGATGCCCCAGGAGGCCGGCCTGGCGGAGGAGTTCCTGCTCACTGACAATAGTGCCATATGGCAAACGGTAGAAGAACTGGATTACTTAGAAGTGGAGGCTGAGGCCAAGATGGAGAACTTGCGAGCTGCAGTGCCAGGTCAACCCCTAGCTCTGACTGCCCGTGGTGGCCCCAAGGACACCCAGCCCAGCTATCACCATGGCAATGGACCTTACAACGACGTGGACATCCCTGGCTGCTGGTTTTTCAAGCTGCCTCACAAGGATGGTAATAGCTGTAATGTGGGAAGCCCCTTTGCCAAGGACTTCCTGCCCAAGATGGAGGATGGCACCCTGCAGGCTGGCCCAGGAGGTGCCAGTGGGCCCCGTGCTCTGGAAATCAACAAAATGATTTCTTTCTGGAGGAACGCCCATAAACGTATCAGCTCCCAGATGGTGGTGTGGCTGCCCAGGTCAGCTCTGCCCCGTGCTGTGATCAGGCACCCCGACTATGATGAGGAAGGCCTCTATGGGGCCATCCTGCCCCAAGTGGTGACTGCCGGCACCATCACTCGCCGGGCTGTGGAGCCCACATGGCTCACCGCCAGCAATGCCCGGCCTGACCGAGTAGGCAGTGAGTTGAAAGCCATGGTGCAGGCCCCACCTGGCTACACCCTTGTGGGTGCTGATGTGGACTCCCAAGAGCTGTGGATTGCAGCTGTGCTTGGAGACGCCCACTTTGCCGGCATGCATGGCTGCACAGCCTTTGGGTGGATGACACTGCAGGGCAGGAAGAGCAGGGGCACTGATCTACACAGTAAGACAGCCACTACTGTGGGCATCAGCCGTGAGCATGCCAAAATCTTCAACTACGGCCGCATCTATGGTGCTGGGCAGCCCTTTGCTGAGCGCTTACTAATGCAGTTTAACCACCGGCTCACACAGCAGGAGGCAGCTGAGAAGGCCCAGCAGATGTACGCTGCCACCAAGGGCCTCCGCTGGTATCGGCTGTCGGATGAGGGCGAGTGGCTGGTGAGGGAGTTGAACCTCCCAGTGGACAGGACTGAGGGTGGCTGGATTTCCCTGCAGGATCTGCGCAAGGTCCAGAGAGAAACTGCAAGGAAGTCACAGTGGAAGAAGTGGGAGGTGGTTGCTGAACGGGCATGGAAGGGGGGCACAGAGTCAGAAATGTTCAATAAGCTTGAGAGCATTGCTACGTCTGACATACCACGTACCCCGGTGCTGGGCTGCTGCATCAGCCGAGCCCTGGAGCCCTCGGCTGTCCAGGAAGAGTTTATGACCAGCCGTGTGAATTGGGTGGTACAGAGCTCTGCTGTTGACTACTTACACCTCATGCTTGTGGCCATGAAGTGGCTGTTTGAAGAGTTTGCCATAGATGGGCGCTTCTGCATCAGCATCCATGACGAGGTTCGCTACCTGGTGCGGGAGGAGGACCGCTACCGCGCTGCCCTGGCCTTGCAGATCACCAACCTCTTGACCAGGTGCATGTTTGCCTACAAGCTGGGTCTGAATGACTTGCCCCAGTCAGTCGCCTTTTTCAGTGCAGTCGATATTGACCGGTGCCTCAGGAAGGAAGTGACCATGGATTGTAAAACCCCTTCCAACCCAACTGGGATGGAAAGGAGATACGGGATTCCCCAGGGTGAAGCGCTGGATATTTACCAGATAATTGAACTCACCAAAGGCTCCTTGGAAAAACGAAGCCAGCCTGGACCATAG'
ORFY_only_nonoverlap = 'CTGGAGCCCAAAGCCAGGTGTTCTGACTCCCAGCGTGGGGGTCCCTGCACCAACC'

In [46]:
Riboseq_dict = processRiboSeqcsv(riboseq_table)

In [47]:
subsequence_dict = queryRiboSeq(ORFY_only_nonoverlap,Riboseq_dict)

280
335


In [48]:
printRiboSeqAnalysis(subsequence_dict)

For the sequence CTGGAGCCCAAAGCCAGGTGTTCTGACTCCCAGCGTGGGGGTCCCTGCACCAACC, Frame 1 has 41.109091 reads, Frame 2 has 16.545455 reads, Frame 3 has 13.781818 reads, and there are a total of 71.436364 reads. 
