Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| <?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article article-type="research-article" dtd-version="1.1d1" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">01179</article-id><article-id pub-id-type="doi">10.7554/eLife.01179</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Cell biology</subject></subj-group><subj-group subj-group-type="heading"><subject>Genomics and evolutionary biology</subject></subj-group></article-categories><title-group><article-title>Ribosome profiling reveals pervasive and regulated stop codon readthrough in <italic>Drosophila melanogaster</italic></article-title></title-group><contrib-group><contrib contrib-type="author" id="author-6339"><name><surname>Dunn</surname><given-names>Joshua G</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff3"/><xref ref-type="aff" rid="aff4"/><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/></contrib><contrib contrib-type="author" id="author-6343"><name><surname>Foo</surname><given-names>Catherine K</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff3"/><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro2"/></contrib><contrib contrib-type="author" id="author-6341"><name><surname>Belletier</surname><given-names>Nicolette G</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="other" rid="par-3"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro2"/></contrib><contrib contrib-type="author" id="author-6342"><name><surname>Gavis</surname><given-names>Elizabeth R</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="other" rid="par-3"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro2"/></contrib><contrib contrib-type="author" corresp="yes" id="author-6320"><name><surname>Weissman</surname><given-names>Jonathan S</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff3"/><xref ref-type="aff" rid="aff4"/><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/></contrib><aff id="aff1"><institution>California Institute of Quantitative Biosciences</institution>, <addr-line><named-content content-type="city">San Francisco</named-content></addr-line>, <country>United States</country></aff><aff id="aff2"><institution content-type="dept">Department of Cellular and Molecular Pharmacology</institution>, <institution>University of California, San Francisco</institution>, <addr-line><named-content content-type="city">San Francisco</named-content></addr-line>, <country>United States</country></aff><aff id="aff3"><institution>Howard Hughes Medical Institute, University of California, San Francisco</institution>, <addr-line><named-content content-type="city">San Francisco</named-content></addr-line>, <country>United States</country></aff><aff id="aff4"><institution>Center for RNA Systems Biology</institution>, <addr-line><named-content content-type="city">Berkeley</named-content></addr-line>, <country>United States</country></aff><aff id="aff5"><institution content-type="dept">Department of Molecular Biology</institution>, <institution>Princeton University</institution>, <addr-line><named-content content-type="city">Princeton</named-content></addr-line>, <country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Sonenberg</surname><given-names>Nahum</given-names></name><role>Reviewing editor</role><aff><institution>McGill University</institution>, <country>Canada</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>weissman@cmp.ucsf.edu</email></corresp></author-notes><pub-date date-type="pub" publication-format="electronic"><day>03</day><month>12</month><year>2013</year></pub-date><pub-date pub-type="collection"><year>2013</year></pub-date><volume>2</volume><elocation-id>e01179</elocation-id><history><date date-type="received"><day>04</day><month>07</month><year>2013</year></date><date date-type="accepted"><day>13</day><month>10</month><year>2013</year></date></history><permissions><copyright-statement>© 2013, Dunn et al</copyright-statement><copyright-year>2013</copyright-year><copyright-holder>Dunn et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/3.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife01179.pdf"/><abstract><object-id pub-id-type="doi">10.7554/eLife.01179.001</object-id><p>Ribosomes can read through stop codons in a regulated manner, elongating rather than terminating the nascent peptide. Stop codon readthrough is essential to diverse viruses, and phylogenetically predicted to occur in a few hundred genes in <italic>Drosophila melanogaster</italic>, but the importance of regulated readthrough in eukaryotes remains largely unexplored. Here, we present a ribosome profiling assay (deep sequencing of ribosome-protected mRNA fragments) for <italic>Drosophila melanogaster</italic>, and provide the first genome-wide experimental analysis of readthrough. Readthrough is far more pervasive than expected: the vast majority of readthrough events evolved within <italic>D. melanogaster</italic> and were not predicted phylogenetically. The resulting C-terminal protein extensions show evidence of selection, contain functional subcellular localization signals, and their readthrough is regulated, arguing for their importance. We further demonstrate that readthrough occurs in yeast and humans. Readthrough thus provides general mechanisms both to regulate gene expression and function, and to add plasticity to the proteome during evolution.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.001">http://dx.doi.org/10.7554/eLife.01179.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.01179.002</object-id><title>eLife digest</title><p>For a gene to give rise to a protein, its DNA is first used as a template to produce a messenger RNA molecule. Each group of three nucleotides within the messenger RNA encodes an amino acid, and structures called ribosomes assemble the protein by joining together amino acids in the correct order. The nucleotide triplets are called codons, and some are known as stop codons because they typically instruct the ribosome to stop adding amino acids.</p><p>Sometimes ribosomes interpret stop codons as amino acid insertion signals, giving rise to an extended protein with a modified structure or function. This phenomenon is known as stop codon readthrough, and is required for many viruses to complete their reproductive cycles. However, much less is known about stop codon readthrough in other organisms.</p><p>Now, Dunn et al. have used a technique called ribosome profiling to analyze stop codon readthrough across the entire genome of the fruit fly <italic>Drosophila melanogaster</italic>. An enzyme was used to fragment messenger RNA, and those fragments that were specifically engaged by ribosomes—and thus likely to encode protein—were sequenced. Stop codon readthrough occurred much more often than had been expected based on previous studies. Indeed, computational analysis strongly suggests that evolution has favored this process for certain fruit fly genes. Moreover, stop codon readthrough was also observed in yeast and human cells, suggesting that it is important in many organisms, not just the fruit fly.</p><p>Stop codon readthrough thus provides a novel way for organisms to tune the expression levels and functions of their genes, both throughout the lifetime of an individual, and the evolution of a species.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.002">http://dx.doi.org/10.7554/eLife.01179.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>ribosome</kwd><kwd>translation</kwd><kwd>readthrough</kwd><kwd>stop codon</kwd><kwd>evolution</kwd><kwd>ribosome profiling</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd><italic>D. melanogaster</italic></kwd><kwd>Human</kwd><kwd><italic>S. cerevisiae</italic></kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution>Howard Hughes Medical Institute</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Dunn</surname><given-names>Joshua G</given-names></name><name><surname>Foo</surname><given-names>Catherine K</given-names></name><name><surname>Weissman</surname><given-names>Jonathan S</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution>National Science Foundation</institution></institution-wrap></funding-source><award-id>Graduate Research Fellowship</award-id><principal-award-recipient><name><surname>Dunn</surname><given-names>Joshua G</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution>National Institutes of Health</institution></institution-wrap></funding-source><award-id>GM061107</award-id><principal-award-recipient><name><surname>Belletier</surname><given-names>Nicolette G</given-names></name><name><surname>Gavis</surname><given-names>Elizabeth R</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution>National Institutes of Health</institution></institution-wrap></funding-source><award-id>P50 GM102706</award-id><principal-award-recipient><name><surname>Dunn</surname><given-names>Joshua G</given-names></name><name><surname>Foo</surname><given-names>Catherine K</given-names></name><name><surname>Weissman</surname><given-names>Jonathan S</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>Ribosomes translate through stop codons far more often than previously thought, yielding C-terminally extended proteins in a variety of eukaryotes.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Upon encountering a stop codon, ribosomes can terminate translation with remarkable fidelity, yet they do not always do so. Stop codon readthrough, the decoding of a stop codon as a sense codon by the ribosome, plays important regulatory roles. Most immediately, readthrough diversifies the proteome by creating a pool of C-terminally extended proteins. In this capacity, it is essential to a variety of plant and animal viruses (<xref ref-type="bibr" rid="bib9">Cimino et al., 2011</xref>; <xref ref-type="bibr" rid="bib47">Li and Rice, 1989</xref>; <xref ref-type="bibr" rid="bib58">Napthine et al., 2012</xref>; <xref ref-type="bibr" rid="bib67">Skuzeski et al., 1991</xref>; <xref ref-type="bibr" rid="bib79">Yoshinaka et al., 1985</xref>; reviewed in <xref ref-type="bibr" rid="bib1">Beier and Grimm, 2001</xref>; <xref ref-type="bibr" rid="bib16">Firth and Brierley, 2012</xref>). In eukaryotic host genes, readthrough is functionally important insofar as it may suppress pathological phenotypes caused by premature stop codons (<xref ref-type="bibr" rid="bib40">Kopczynski et al., 1992</xref>; <xref ref-type="bibr" rid="bib14">Fearon et al., 1994</xref>), antagonize nonsense-mediated decay (<xref ref-type="bibr" rid="bib38">Keeling et al., 2004</xref>), and, by changing the C-terminal sequence of a given protein, modulate its activity (<xref ref-type="bibr" rid="bib73">Torabi and Kruglyak, 2012</xref>), stability (<xref ref-type="bibr" rid="bib57">Namy et al., 2002</xref>), and/or localization (<xref ref-type="bibr" rid="bib18">Freitag et al., 2012</xref>). In yeast, the efficiency of translation termination is modulated by [<italic>PSI</italic><sup>+</sup>], an epigenetic state resulting from prion-like aggregates of Sup35p, the yeast homologue of the translation termination factor eRF3 (reviewed in <xref ref-type="bibr" rid="bib75">Tuite and Cox, 2007</xref>). Various yeast strains exhibit [<italic>PSI</italic><sup>+</sup>]-dependent fitness advantages, implying that increased readthrough activates useful genetic diversity that is ordinarily masked by stop codons (<xref ref-type="bibr" rid="bib74">True and Lindquist, 2000</xref>; <xref ref-type="bibr" rid="bib24">Halfmann et al., 2012</xref>). In addition, a small baseline level of readthrough appears to be beneficial in wild [<italic>psi<sup>−</sup></italic>] yeast strains, as alleles of various factors controlling termination efficiency are under balancing selection (<xref ref-type="bibr" rid="bib72">Torabi and Kruglyak, 2011</xref>).</p><p>However, a broad understanding of the biological roles of readthrough in eukaryotes remains elusive due to a lack of experimental data. To date, only a handful of eukaryotic host genes have been experimentally demonstrated to undergo readthrough in wild-type or prion-free organisms (<xref ref-type="bibr" rid="bib21">Geller and Rich, 1980</xref>; <xref ref-type="bibr" rid="bib77">Xue and Cooley, 1993</xref>; <xref ref-type="bibr" rid="bib39">Klagges et al., 1996</xref>; <xref ref-type="bibr" rid="bib68">Steneberg et al., 1998</xref>; <xref ref-type="bibr" rid="bib57">Namy et al., 2002</xref>; <xref ref-type="bibr" rid="bib34">Jungreis et al., 2011</xref>; <xref ref-type="bibr" rid="bib18">Freitag et al., 2012</xref>; <xref ref-type="bibr" rid="bib73">Torabi and Kruglyak, 2012</xref>; <xref ref-type="bibr" rid="bib78">Yamaguchi et al., 2012</xref>). Compelling evidence that readthrough is broadly important in eukaryotes came with the development of algorithms (CSF and PhyloCSF) that use orthologous nucleotide sequences from related organisms to identify protein-coding regions of a reference genome based upon signatures of amino acid conservation (<xref ref-type="bibr" rid="bib48">Lin et al., 2007</xref>, <xref ref-type="bibr" rid="bib49">2011</xref>). Using this approach, 283 readthrough events were predicted in <italic>Drosophila melanogaster</italic>, six of which they confirmed experimentally (<xref ref-type="bibr" rid="bib48">Lin et al., 2007</xref>; <xref ref-type="bibr" rid="bib34">Jungreis et al., 2011</xref>). While these algorithms provide a powerful means to identify ancient and phylogenetically conserved readthrough events, they are limited in their ability to detect evolutionarily recent events. Nor can bioinformatic approaches identify a priori the tissues or cell types in which readthrough occurs, measure the fraction of ribosomes that read through a given stop codon, or determine whether any of these processes are regulated: such questions demand experimental approaches.</p><p>To this end, we present a modified ribosome profiling protocol—based on the deep sequencing of ribosome-protected footprint fragments (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>)—that enables analysis of translation at a genome-wide level in <italic>D. melanogaster</italic>. Application of the <italic>Drosophila</italic> ribosome profiling strategy allows annotation of the <italic>Drosophila</italic> proteome using empirical data. By examining the physical locations of ribosomes along mRNAs, we discover that readthrough is far more pervasive than expected: we identify more than 300 readthrough events not predicted by phylogenetic approaches. We provide evidence that these novel extensions are of recent evolutionary origin, and show using specific examples that both the novel and conserved extensions can produce stable protein products, be produced in a regulated manner, and contain functional subcellular localization signals. We further demonstrate that readthrough occurs at many loci in [<italic>psi<sup>−</sup></italic>] yeast and in primary human foreskin fibroblasts, arguing that readthrough is both a ubiquitous feature of eukaryotic translation and a novel mechanism to regulate gene expression. Stop codon readthrough thus adds plasticity to the proteome during development, and provides an evolutionary mechanism for extant genes to acquire new functions.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>Development of a ribosome profiling assay for cultured <italic>Drosophila</italic> cells</title><p>In order to study translation and, more specifically, stop codon readthrough in <italic>D. melanogaster,</italic> we sought to develop a robust ribosome profiling assay for this organism. We initially developed our protocol in S2 cells, a macrophage-like lineage derived from late-stage <italic>Drosophila</italic> embryos.</p><p>In previous studies, ribosome-protected fragments or ‘footprints’ were generated by digesting eukaryotic polysome lysates with RNase I (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>, <xref ref-type="bibr" rid="bib31">2011</xref>). In contrast to yeast and mammalian cell lines, we found that <italic>Drosophila</italic> ribosomes are highly sensitive to RNase I, potentially due to their unusual rRNA sequences and structures (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1A</xref>; <xref ref-type="bibr" rid="bib25">Hancock et al., 1988</xref>; <xref ref-type="bibr" rid="bib32">Jordan, 1975</xref>; <xref ref-type="bibr" rid="bib33">Jordan et al., 1976</xref>; <xref ref-type="bibr" rid="bib61">Pavlakis et al., 1979</xref>). By contrast, we found that <italic>Drosophila</italic> ribosomes tolerate micrococcal nuclease (MNase) over a wide range of concentrations (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1B–D</xref>). In contrast to RNase I, MNase has a strong 3′ A/T bias. This gives rise to a small amount of positional uncertainty with P-site mapping in MNase datasets, and prevents us from achieving the sort of sub-codon resolution seen in ribosome profiling datasets generated with RNase I.</p><p>Nonetheless, replicate experiments established that our measure of translation rate (the <italic>ribosome footprint density,</italic> defined as the number of ribosome-protected fragments per kilobase of coding region per million aligning reads in the dataset; RPKM), is highly reproducible and insensitive to changes in buffer conditions (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1E</xref>, <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2A,B</xref>; full data in supplementary table 1 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>). Focusing on coding regions that had a minimum of 128 reads, we observed strong correlation between replicates (r<sup>2</sup> = 0.998; <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>) and an inter-replicate standard deviation of 1.07-fold, comparable to our protocols in yeast and mammalian cells. Furthermore, our measurements are robust to the number of isoforms per gene, the fraction of sequence-degenerate positions in a gene, gene length, A/T content, and distribution of ribosome density within a gene (<xref ref-type="fig" rid="fig1s3">Figure 1—figure supplement 3</xref>).</p></sec><sec id="s2-2"><title>Development of a ribosome profiling protocol for <italic>Drosophila</italic> embryos</title><p>In early (0–2 hr) <italic>Drosophila</italic> embryos, the vast majority of transcripts are maternally supplied and therefore regulated by post transcriptional processes, such as poly- or deadenylation, capping or de-capping, localization, degradation, and control of translation initiation. The early <italic>Drosophila</italic> embryo has thus been an important system for the study of post-transcriptional and specifically translational regulation (reviewed in <xref ref-type="bibr" rid="bib45">Lasko, 2011</xref>).</p><p>To enable the broad analysis of these processes, we developed a sample harvesting strategy that captures the translational state of early embryos with minimal perturbation. Specifically, we developed a cryolysis protocol in which embryos are collected directly from egg-laying dishes into liquid nitrogen, homogenized while frozen, and thawed in the presence of translation inhibitors to prevent post-lysis translation. Notably, we omit dechorionation and rinsing, steps which could induce cold shock, anoxia, and related translational artifacts.</p><p>We collected replicate samples of 0–2 hr embryos, and subjected them to ribosome profiling and RNA-seq of poly(A)-selected mRNA. A subset of ribosomes partition into heavy polysomes (<xref ref-type="fig" rid="fig1">Figure 1A</xref>), consistent with reports that a distinct subset of messages is well-translated at this stage (<xref ref-type="bibr" rid="bib63">Qin et al., 2007</xref>). Ribosome density measurements from replicate embryo collections are correlated nearly as well (r<sup>2</sup> = 0.984; <xref ref-type="fig" rid="fig1">Figure 1B</xref>; supplementary table 1 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>) as measurements from technical replicates from a single culture of S2 cells (r<sup>2</sup> = 0.998; <xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1E</xref>). The <italic>Drosophila</italic> embryo thus provides a system in which experimental noise approaches the precision of our measurements, a fact that will facilitate detection of even small expression differences between wild-type and mutant fly strains.<fig-group><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.003</object-id><label>Figure 1.</label><caption><title>Development and validation of a ribosome profiling assay for <italic>Drosophila melanogaster</italic>.</title><p>(<bold>A</bold>) Aliquots of polysome lysate from 0–2 hr embryos were fractionated on 10–50% sucrose gradients with or without prior micrococcal nuclease digestion. Digestion of exposed mRNA between ribosomes collapses the polysome peaks into the monosomal (80S) peak. The area under the monosome peak in the digested sample is 1.04-fold the combined area under the monosome and polysome peaks in the undigested sample, indicating quantitative recovery. (<bold>B</bold> and <bold>C</bold>) Measurements of translation are reproducible between replicates samples of 0–2 hr embryos. Pearson correlation coefficients (r<sup>2</sup>) are shown for total ribosome-protected footprint counts in coding regions for all genes sharing at least 128 summed footprint counts between replicates (<bold>B</bold>), or translation efficiency measurements for all genes sharing 128 summed mRNA fragment counts between replicates (<bold>C</bold>). Histogram of log<sub>10</sub> fold-changes in translational efficiency for each gene between two embryo replicates, along with normal error curve (<bold>C</bold>, inset). (<bold>D</bold>–<bold>F</bold>) Pooled data for genes containing at least 128 summed mRNA counts between both embryo replicates. Median-centered histograms of translation efficiency (pink) and mRNA abundance (blue) (<bold>D</bold>). Translational efficiency vs mRNA abundance for each gene (<bold>E</bold>). Ribosome density vs mRNA abundance for each gene (<bold>F</bold>). Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.003">http://dx.doi.org/10.7554/eLife.01179.003</ext-link></p></caption><graphic xlink:href="elife01179f001"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.004</object-id><label>Figure 1—figure supplement 1.</label><caption><title>Digestion with micrococcal nuclease yields a robust ribosome profiling assay.</title><p>(<bold>A</bold>) Digestion of polysomes with RNase I degrades ribosomes. A lysate was made from S2 cells using a previous version of our protocol. Aliquots of this lysate were digested with increasing amounts of RNase I, and resolved on 10–50% sucrose gradients. As amounts of RNase I increase, the heights of all peaks—including the monosomal (80S) peak—decrease before polysomes are fully resolved to monosomes. (<bold>B</bold>) as in (<bold>A</bold>), but using micrococcal nuclease (MNase) and our current protocol. From 0.5 to 2 U MNase/μg total RNA, monosomes are resolved with no reduction in the size of the monosome peak. This result indicates that <italic>Drosophila</italic> ribosomes are stable to MNase over a broad range of concentrations, whereas the mRNA between ribosomes is digested. (<bold>C</bold>) Ribosome protection assay. A 320 nucleotide fragment of enolase (FlyBase accession: FBgn0000579) was amplified using oligos oJGD123 & oJGD124 (<xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref>). A body-labeled probe against this sequence was transcribed from this template using α32P-UTP and the T7 MaxiScript kit (Ambion). S2 cell lysates were prepared as in methods and aliquoted. Aliquots were digested as in methods, except with 0, 0.5, 1, 2, 3 or 4 U MNase/μg total RNA. Monosomes were sedimented through a sucrose cushion, resuspended in 600 μl 10 mM Tris pH 7.0, and their RNAs extracted as in ‘Materials and methods’. Concentrations were determined using a NanoDrop spectrophotometer. 5 μg of each sample was hybridized to 50,000 CPM of probe overnight at 42°C. Single-stranded regions were digested with RNase A/T1 and the remaining footprint: probe duplexes detected using the mirVana micro-RNA detection kit (Ambion), resolved on a 15% TBE-urea gel (Invitrogen), and visualized on a Storm phosphorimager (Molecular Dynamics by GE Healthcare Bio-Sciences, Pittsburgh, PA). For size markers, we end-labeled the Novex 10 bp dsDNA ladder (Invitrogen) with 32P. Over two-fold range of nuclease concentrations, the ∼30 nt peak corresponding to ribosome-protected footprints remains constant in size and intensity, indicating a lack of degradation consistent with the unchanged monosome peak height across this range of digestion conditions in (<bold>B</bold>). Also visible is a roughly 60 nt band which we infer to be protected by adjacent ribosomes (disomes) that sterically exclude the nuclease. This interpretation is consistent with the presence of a small disome peak in digested samples (c.f. panels <bold>B</bold> and <bold>D</bold>, and <xref ref-type="fig" rid="fig1">Figure 1A</xref>). (<bold>D</bold>) A polysome lysate was prepared from S2 cells and resolved in 10–50% sucrose gradients, with or without prior digestion with 3 U MNase/μg total RNA (<bold>E</bold>) A culture of S2 cells was split into aliquots and processed using our current protocol as if they were independent samples. Total counts aligning to the coding region of each gene were tabulated in each replicate. Genes sharing at least 128 footprint counts between replicates (red) are well-correlated, demonstrating the assay is robust (see full discussion in <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>). Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.004">http://dx.doi.org/10.7554/eLife.01179.004</ext-link></p></caption><graphic xlink:href="elife01179fs001"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.005</object-id><label>Figure 1—figure supplement 2.</label><caption><title>Effects of buffer conditions upon reproducibility.</title><p>A culture of S2 cells was divided into four aliquots, and each aliquot carried through the entire ribosome profiling procedure as an independent sample. Two aliquots (‘150a’ and ‘150b’) were processed using our standard lysis buffer with 150 mM Na<sup>+</sup> and 5 mM Mg<sup>+</sup> and digested with 3 U MNase/μg total RNA as described in ‘Materials and methods’. The other two (‘250a’ and ‘250b’) were processed using an earlier version of our protocol, in which our lysis buffer contained 250 mM Na<sup>+</sup> and 15 mM Mg<sup>++</sup>, and in which we digested lysates with 30 U MNase/μg total RNA. We then calculated ribosome density for each gene over coding regions (<bold>A</bold>), 5' UTRs (<bold>C</bold>) and 3' UTRs (<bold>D</bold>), performed pairwise comparisons between samples. For each comparison, we binned genes based upon the summed number of reads in samples A and B, and calculated the correlation coefficients (Pearson's r) for the RPKM values for each gene in each bin (left column). The number of genes in each bin are also shown (right column). Correlations between samples for coding regions are robust across buffer regions (<bold>A</bold>), though some salt-dependence is visible in 5′ and 3′ UTRs (<bold>C</bold> and <bold>D</bold>). (<bold>B</bold>) As in (<bold>A</bold>), but using only 10% of the reads. The high correlation observed at our 128-minimum-count threshold is therefore not a function of the number of genes in each bin Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.005">http://dx.doi.org/10.7554/eLife.01179.005</ext-link></p></caption><graphic xlink:href="elife01179fs002"/></fig><fig id="fig1s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.006</object-id><label>Figure 1—figure supplement 3.</label><caption><title>Variability in ribosome footprint density measurements are not correlated with isoform number, sequence degeneracy in the locus of interest, locus length, A/T content, or evenness of coverage.</title><p>Comparisons are made between S2 cell technical replicates 150a and 150b (<xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>) (<bold>A</bold>) Variability of log<sub>2</sub> fold-changes in ribosome footprint densities are no greater for multi-isoform loci (pink) than they are for single-isoform loci (blue) (<bold>B</bold>) Correlation of the fraction degenerate positions in each locus (‘Materials and methods’) with fold-changes in ribosome density between replicates at that locus. Loci with at least 128 counts between replicates are shown in black, those with less in red. (<bold>C</bold>) as in (<bold>B</bold>), but correlation of length with inter-replicate fold-changes. (<bold>D</bold>) as in (<bold>B</bold>), but correlation of A/T content with inter-replicate fold-changes. (<bold>E</bold>) as in (<bold>B</bold>), but correlation of area under Lorenz curve with inter-replicate fold-changes Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.006">http://dx.doi.org/10.7554/eLife.01179.006</ext-link></p></caption><graphic xlink:href="elife01179fs003"/></fig><fig id="fig1s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.007</object-id><label>Figure 1—figure supplement 4.</label><caption><title>Measurements of translation efficiency obtained via ribosome profiling are consistent with those made using semiquantitative polysome gradients.</title><p>Histograms of translation efficiency for genes labeled by <xref ref-type="bibr" rid="bib63">Qin et al. (2007)</xref> as active (blue) or inactive (yellow) in 0–2 hr embryos. All genes are shown in gray. Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.007">http://dx.doi.org/10.7554/eLife.01179.007</ext-link></p></caption><graphic xlink:href="elife01179fs004"/></fig></fig-group></p><p>Translational control is measured by a gene’s <italic>translation efficiency,</italic> estimated as the ratio of ribosome footprint density (from ribosome profiling) to mRNA abundance (from mRNA-seq) for each gene. Translation efficiency measurements between replicate embryo collections are highly reproducible (r<sup>2</sup> = 0.946; <xref ref-type="fig" rid="fig1">Figure 1C</xref>) and consistent with prior measurements made by semiquantitative methods (<xref ref-type="fig" rid="fig1s4">Figure 1—figure supplement 4</xref>). The standard deviation of fold-changes between biological replicates is 1.19-fold (<xref ref-type="fig" rid="fig1">Figure 1C</xref>, inset), allowing detection of even modest changes in translation efficiency.</p><p>Remarkably, we find that the range of translation efficiencies for different messages spans four orders of magnitude, a range comparable to that observed for mRNA abundance of well-counted genes (<xref ref-type="fig" rid="fig1">Figure 1D</xref>). Moreover, translation efficiency is uncorrelated with mRNA abundance (r<sup>2</sup> = 8.29 × 10<sup>−5</sup>; <xref ref-type="fig" rid="fig1">Figure 1E</xref>) and mRNA abundance predicts only one third of the variance in the rate of protein production as measured by ribosome footprint density (<xref ref-type="fig" rid="fig1">Figure 1F</xref>). Translational regulation is therefore a major determinant of gene expression in the early embryo (supplementary table 1 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>), and ribosome profiling provides a quantitative and robust means to monitor translational regulation during development.</p></sec><sec id="s2-3"><title>Ribosome density on 5′ UTRs is similar to that of coding regions</title><p>In addition to measuring gene expression, ribosome profiling maps the physical positions of ribosomes on each transcript, and thus provides a powerful tool to annotate which portions of mRNAs are translated. Consistent with our previous work in mammals (<xref ref-type="bibr" rid="bib31">Ingolia et al., 2011</xref>) and yeast (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>; <xref ref-type="bibr" rid="bib4">Brar et al., 2012</xref>), many 5′ UTRs in <italic>Drosophila</italic> contain substantial footprint density (<xref ref-type="fig" rid="fig2">Figure 2A</xref>, <xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>; <xref ref-type="supplementary-material" rid="SD1-data">Supplementary file 1A</xref>) covering sequences that appear to be upstream open reading frames (uORFs; example in <xref ref-type="fig" rid="fig2">Figure 2C</xref>).<fig-group><fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.008</object-id><label>Figure 2.</label><caption><title>5’ UTRs are translated.</title><p>(<bold>A</bold>) Histograms of ribosome footprint density, corrected by mRNA abundance, for 5’ UTRs, coding regions (CDS), and 3’ UTRs in 0–2 hr embryos. (<bold>B</bold>) Measurements of ribosome footprint densities of 5’ UTRs agree comparably well across a range of sequencing depths, regardless of whether 80S monosomes are specifically isolated on a sucrose gradient or enriched in a cushion. For each pair of sequencing samples, Pearson correlation coefficients (r) of ribosome footprint density measurements for 5’ UTRs are plotted as a function of sequencing depth. (<bold>C</bold>) Example of ribosome density in 5’ UTRs corresponding to the locations of uORFs. Roughly ∼200 nt of the genomic locus <italic>Ino80</italic> covering portions of the 5’ UTR (thin gray box) and CDS (thick gray box) are shown. In both 0–2 hr embryos and S2 cells, Initiation peaks are visible at the starts of uORFs starting with an ATG codon (green box) and a near-cognate TTG codon (yellow box) as well as at the annotated start codon (beginning of thick gray box). Source data for panels (<bold>A</bold>) and (<bold>B</bold>) may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.008">http://dx.doi.org/10.7554/eLife.01179.008</ext-link></p></caption><graphic xlink:href="elife01179f002"/></fig><fig id="fig2s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.009</object-id><label>Figure 2—figure supplement 1.</label><caption><title>Ribosome density over start and stop codons.</title><p>Ribosome density across the average gene or ‘metagene’ reveals peaks of ribosome density at start and stop codons. For this analysis we included all genes that met the following criteria: (<bold>a</bold>) all transcripts deriving from that gene had one annotated start codon (left panel) or stop codon (right panel), (<bold>b</bold>) all transcripts deriving from that locus covered identical genomic positions over the region of interest (ROI) shown, (<bold>c</bold>) all positions within the ROI were non-degenerate (‘Materials and methods’), and (<bold>d</bold>) at least 10 reads were present in the coding subregion of the ROI. For each ROI meeting these criteria (2800–3200 ROI per sample), we generated a ‘coverage vector’ tallying ribosome density at each nucleotide position. We then normalized each coverage vector to the mean number of footprint reads covering the annotated coding region in the ROI, excluding a 3-codon buffer flanking the start or stop codon to avoid bleedthrough from initiation or termination peaks. We then plotted the median value across all normalized coverage vectors at each position. Peaks are visible in the start and stop codons of embryo samples. Consistent with our previous work, stop codon peaks are missing from S2 cell samples because terminating ribosomes release during our 2-min treatment with translation inhibitors. They are present in our embryo samples, because these are flash-frozen and lysed in the presence of translation inhibitors, which block termination as well as initiation and elongation.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.009">http://dx.doi.org/10.7554/eLife.01179.009</ext-link></p></caption><graphic xlink:href="elife01179fs005"/></fig><fig id="fig2s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.010</object-id><label>Figure 2—figure supplement 2.</label><caption><title>Read lengths are similar in 5’ UTRs and coding regions.</title><p>We aggregated all ribosome-protected reads aligning to all genes with a single initiation codon, and in which all annotated isoforms cover the same genomic positions in the ROI shown. We plotted the following statistics as a function of the reads whose 5' end mapped to each position on the x-axis. Top: number of reads (y-axis) aligning at each position. Because the 5' end, rather than the P-site, is plotted, the peak of ribosome density is approximately 13 nucleotides 5' of the start codon (position 0, x-axis). Middle: heatmap of read lengths (y-axis) as a function of position. Bottom: median read length (y-axis) at each position.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.010">http://dx.doi.org/10.7554/eLife.01179.010</ext-link></p></caption><graphic xlink:href="elife01179fs006"/></fig><fig id="fig2s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.011</object-id><label>Figure 2—figure supplement 3.</label><caption><title>The choice of monosome enrichment technique—sedimentation through sucrose cushions or by fractionation on sucrose gradients—minimally affects of ribosome density across 5’ UTRs and coding regions. 3’ UTR measurements are noisier in samples prepared on cushions rather than gradients.</title><p>A polysome lysate was made from collected 0–2 hr embryos, digested with MNase, and split into four aliquots. Monosomes from two aliquots were sedimented through a sucrose cushion and recovered. Monosomes from the remaining two aliquots were fractionated on 10–50% sucrose gradients and collected. All four samples were then independently carried through our protocol, and footprint density was calculated over coding regions, 5' UTRs, and 3' UTRs. Pairwise comparisons were made for each sample as in <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref> over coding regions (<bold>A</bold>), 5' UTRs (<bold>B</bold>), or 3' UTRs (<bold>C</bold>). Pearson correlations (r) for the regions are plotted as a function of sequencing depth. Source data may be found in supplementary table 1 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.011">http://dx.doi.org/10.7554/eLife.01179.011</ext-link></p></caption><graphic xlink:href="elife01179fs007"/></fig></fig-group></p><p>We attribute this density to translating 80S ribosomes rather than 48S preinitiation complexes for three reasons: first, the length distribution of protected fragments in 5′ UTRs (25–35 nt) is indistinguishable from the length distribution of ribosome-protected fragments in coding regions (<xref ref-type="fig" rid="fig2s2">Figure 2—figure supplement 2</xref>), while the protected footprint of a preinitiation complex is reported to be larger (40–70 nt; <xref ref-type="bibr" rid="bib46">Lazarowitz and Robertson, 1977</xref>; <xref ref-type="bibr" rid="bib62">Pisarev et al., 2008</xref>). Second, our measurements of 5′ UTR density are indistinguishable whether we enrich digested monosomes by sedimentation through a sucrose cushion (which collects all heavy particles) or specifically separate them from preinitiation complexes by fractionation of a sucrose gradient (<xref ref-type="fig" rid="fig2">Figure 2B</xref>, <xref ref-type="fig" rid="fig2s3">Figure 2—figure supplement 3</xref>). Thus, the dominant signal contributing to our measurement of footprint density in 5′ UTRs is derived from fragments protected by 80S ribosomes. Third, because initiation and termination of translation are slow compared to elongation, initiation and termination events produce peaks of ribosome density (<xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>; <xref ref-type="bibr" rid="bib31">Ingolia et al., 2011</xref>). Such peaks are frequently visible at the boundaries of predicted uORF sequences (example in <xref ref-type="fig" rid="fig2">Figure 2C</xref>), again arguing that reads aligning to 5′ UTRs represent translation events. Given the known roles of uORFs in regulating both the translation and the stability of mRNAs (reviewed in <xref ref-type="bibr" rid="bib53">Meijer and Thomas, 2002</xref>) we anticipate that our methods will facilitate future analyses of the contributions of uORFs to control of gene expression throughout fly development.</p></sec><sec id="s2-4"><title>A subset of genes exhibit stop codon readthrough, resulting in C-terminal protein extensions</title><p>Comparative analysis of the genomes of 12 sequenced <italic>Drosophila</italic> species has provided a powerful strategy for annotating protein-coding regions in <italic>Drosophila</italic> genomes (<xref ref-type="bibr" rid="bib48">Lin et al., 2007</xref>, <xref ref-type="bibr" rid="bib49">2011</xref>). Using this approach, 283 transcripts in <italic>D. melanogaster</italic> were demonstrated to contain clear phylogenetic signatures of amino acid conservation in the region between the annotated and next in-frame stop codons. It was therefore concluded that these regions encode C-terminal protein extensions (hereon called ‘predicted extensions’), produced by stop codon readthrough events (<xref ref-type="bibr" rid="bib48">Lin et al., 2007</xref>; <xref ref-type="bibr" rid="bib34">Jungreis et al., 2011</xref>).</p><p>In our data, the density of ribosomes on 3′ UTRs is several orders of magnitude lower than in coding regions and 5′ UTRs (<xref ref-type="fig" rid="fig2">Figure 2A</xref>, <xref ref-type="supplementary-material" rid="SD1-data">Supplementary file 1A</xref>), and many genes show highly efficient termination (example in <xref ref-type="fig" rid="fig3">Figure 3B</xref>). However, a subset of transcripts exhibit high footprint density within the predicted extensions. To determine whether the footprint density was consistent with stop codon readthrough (as opposed to alternate explanations, like frameshift), we manually scored each predicted extension whose corresponding structural gene was sufficiently expressed in our embryo sample (158 in total). An extension was scored positively if there existed ribosome density in the extension, ribosome density vanished or unambiguously decreased following the first in-frame stop codon, and positions occupied by ribosomes in the putative extension evenly covered the majority of the extension’s length (see ‘Materials and methods’ for further details). By these criteria, 43 of the 283 transcripts predicted to undergo stop codon readthrough contained ribosome density consistent with a readthrough event (example <xref ref-type="fig" rid="fig3">Figure 3C</xref>, full data in supplementary table 2 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>), including one example of double readthrough (<xref ref-type="fig" rid="fig3">Figure 3D</xref>). We expect that the many of the remaining 240 transcripts also undergo readthrough, either at levels too low to detect at our sequencing depth, or at other developmental stages (discussed further below).<fig-group><fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.012</object-id><label>Figure 3.</label><caption><title>A subset of genes exhibit apparent stop codon readthrough.</title><p>(<bold>A</bold>) Venn diagram summarizing readthrough events. Of 283 predicted extensions, 256 were consistent with FlyBase genome annotation revision 5.43. For 158 of these, the corresponding coding regions were expressed in 0–2 hr embryos. Of this subset, 43 exhibited clear signs of readthrough. Others were ambiguous, untranslated, or could be explained by other mechanisms (<xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1</xref>). In addition, we identified 307 examples of readthrough that were not phylogenetically predicted. (<bold>B</bold>) Example of a gene that does not exhibit readthrough. Top: genomic locus with UTRs (thin boxes), introns (line), and coding regions (thick boxes). Middle: normalized footprint density covering the locus in 0–2 hr embryos (blue) and S2 cells (red) in reads per million. Bottom: magnification of region where a putative C-terminal extension would be found. Dashed lines: annotated and next in-frame stop codons (<bold>C</bold>) as in (<bold>B</bold>), except stop codon readthrough creates a C-terminal protein extension in <italic>RanBPM</italic>, a gene phylogenetically predicted to undergo readthrough (<bold>D</bold>) as in (<bold>B</bold>), but an example of phylogenetically predicted double-readthrough. (<bold>E</bold>) Ratios of the ribosome footprint density in putative extensions to corresponding coding regions. Blue: extensions predicted to undergo readthrough. Yellow: all other possible extensions. Extensions that overlapped any annotated CDS, snoRNA, or snRNA were excluded. Boxes: IQR. Whiskers: 1.5*IQR. (<bold>F</bold>) as in (<bold>C</bold>), except this transcript was not predicted to undergo readthrough. (<bold>G</bold>) as in (<bold>D</bold>), except this transcript was not predicted to undergo single or double readthrough. Source data may be found in supplementary table 2 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.012">http://dx.doi.org/10.7554/eLife.01179.012</ext-link></p></caption><graphic xlink:href="elife01179f003"/></fig><fig id="fig3s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.013</object-id><label>Figure 3—figure supplement 1.</label><caption><title>Examples of footprint density in 3’ UTRs attributed to sources other than readthrough.</title><p>(<bold>A</bold> and <bold>B</bold>) Sample transcripts exhibiting translation in alternate frames. (<bold>C</bold>) Footprint density, potentially caused by RNA binding proteins or structures, coats the 3' UTR of <italic>EF1gamma</italic>, passing through stop codons (red triangles) in all three frames reaching the 3' end of the transcript. Colors as in (<bold>A</bold> and <bold>B</bold>), but additionally showing RNA-seq data in gray. (<bold>D</bold>) The 3' UTR of HIS3.3B contains highly localized read density consistent with the presence of an RNA binding protein or mRNA structure, but not with translation of an open reading frame. Colors as in (<bold>C</bold>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.013">http://dx.doi.org/10.7554/eLife.01179.013</ext-link></p></caption><graphic xlink:href="elife01179fs008"/></fig></fig-group></p><p>Surprisingly, we observed that a distinct set of transcripts not predicted to undergo readthrough also exhibits substantial footprint density between the annotated and next in-frame stop codons (<xref ref-type="fig" rid="fig3">Figure 3E</xref>). We therefore searched for C-terminal extensions among all transcripts that met the following criteria: (a) a minimum of 128 footprint in the corresponding CDS, (b) a minimum footprint read density of 0.2 RPKM in the extension, (c) a minimum readthrough rate of 0.001, and (d) a lack of methionine codons in the first three codons of the extension, as this latter group could be explained by initiation within the extension rather than readthrough of the upstream stop codon. We additionally excluded extensions whose translation could be explained by alternately spliced transcript isoforms that omit the stop codon. Scoring this group by the same criteria used for the predicted extensions, we identified 307 additional examples of stop codon readthrough (hereon referred to as ‘novel extensions’; see example <xref ref-type="fig" rid="fig3">Figure 3F</xref>), including another example of double readthrough (<xref ref-type="fig" rid="fig3">Figure 3G</xref>). In addition, we identified several transcripts that contained 3′ UTR footprint density more consistent with ribosomal frameshift (<xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1A,B</xref>), or the presence of additional downstream cistrons, RNA structure, or protein binding (<xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1C,D</xref>). These were excluded from further analysis.</p></sec><sec id="s2-5"><title>Ribosome-protected footprints in C-terminal extensions show signatures of translation</title><p>Because footprint density generally is far lower in 3′ UTRs than in 5′ UTRs or coding regions (<xref ref-type="fig" rid="fig2">Figure 2A</xref>), it is possible that various sources of noise (e.g. regions of mRNA protected by RNA structures or by RNA-binding proteins) might contribute more substantially to this density than to the density in coding regions. We therefore asked whether footprints in 3′ UTRs exhibited behaviors specific to footprints protected by 80S ribosomes.</p><p>In order to distinguish whether reads mapping to extensions were either protected by ribosomes or derived from alternate sources, we compared the total number of reads aligning to extensions in samples prepared from sucrose cushions, which collect all heavy macromolecular complexes, to those in which we specifically isolated 80S ribosomes on sucrose gradients. Footprint count measurements for each extension are highly correlated between libraries made using these two sample preparation methods, indicating that these footprints are either protected by 80S ribosomes, or by another RNA binding protein that co-sediments with 80S ribosomes (<xref ref-type="fig" rid="fig4">Figure 4A</xref>; r<sup>2</sup> = 0.945).<fig-group><fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.014</object-id><label>Figure 4.</label><caption><title>Translation downstream of the stop codon is due to readthrough.</title><p>(<bold>A</bold>) Ribosome footprint counts for each C-terminal extension are well correlated between samples prepared by sedimentation through sucrose cushions or by fractionation on sucrose gradients (blue). For comparison, footprint counts for annotated coding regions in each sample type are plotted (gray). The Pearson correlation coefficient (r<sup>2</sup>) for C-terminal extensions is shown. (<bold>B</bold>) Distributions of read lengths for footprints aligning to annotated coding regions (CDS, red) and to C-terminal extensions (blue) are similar, while lengths of footprints aligning to tRNAs, snRNAs, and snoRNAs are quite different. (<bold>C</bold>) Meta-gene average of ribosome density at the annotated stop codons of coding regions (red), or at the stop codons that terminate extensions (blue). Both averages show characteristic peaks of ribosome density above the stop codon, characteristic of translation termination. (<bold>D</bold>) Readthrough produces detectable protein products. Bottom: schema of reporters. Reporters containing the GFP variant Venus fused to the 120 C-terminal codons and entire endogenous 3’ UTR of a gene of interested were transfected into S2 cells. To facilitate detection of readthrough products, a double-FLAG epitope was inserted upstream of the stop codon (red) that terminates the putative extension. Top: reporters were immunoprecipitated with anti-GFP antibodies. Immunoprecipitates were then resolved by SDS-PAGE and western blotted with anti-FLAG antibodies to detect protein products of readthrough. Blue: names of genes containing extensions predicted to undergo readthrough. Yellow: names of genes containing novel extensions. (<bold>E</bold>) For each nucleotide in each stop codon that undergoes readthrough, we counted the fraction of reads containing nucleotide mismatches and present the data as a histogram. Transcripts containing stop codon nucleotides with significantly elevated mismatch rates are explicitly noted. Green: transcripts containing genomic polymorphisms that mutate one stop codon to another. Red: transcripts containing genomic polymorphisms that convert stop codons to sense codons. Black: other transcripts containing significantly elevated proportions of mismatches. (<bold>F</bold>) as in (<bold>E</bold>), but for ribosome-protected footprint data. (<bold>G</bold>) as in (<bold>F</bold>), but the analysis was restricted to the subset of footprints that both include the sequence of the stop codon and derive from ribosomes that have already translated the stop codon (top, green ribosome in cartoon).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.014">http://dx.doi.org/10.7554/eLife.01179.014</ext-link></p></caption><graphic xlink:href="elife01179f004"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.015</object-id><label>Figure 4—figure supplement 1.</label><caption><title>C-terminal extensions in <italic>Drosophila melanogaster</italic> show ribosome release typical of coding regions, but not of internal codons.</title><p>For each region of interest, the total number of reads aligning to 5 codon windows immediately upstream and downstream of that codon were tabulated, and the ratio (downstream counts/upstream counts) plotted against the total number of counts in the upstream window. (<bold>A</bold>) Comparison of release scores for termination codons of annotated coding regions and form randomly-selected codons internal to (i.e., at least 10 codons from the annotated start or end) annotated coding regions. (<bold>B</bold>) as in (<bold>A</bold>), but stop codons that terminate predicted extensions are compared against those that terminate annotated coding regions. (<bold>C</bold>) as in (<bold>A</bold>) but stop codons that terminate novel extensions are compared against those that terminate annotated coding regions. Source data may be found in supplementary table 2 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.015">http://dx.doi.org/10.7554/eLife.01179.015</ext-link></p></caption><graphic xlink:href="elife01179fs009"/></fig></fig-group></p><p>Because various ribosome-binding proteins protect nucleotide fragments of distinct lengths, the size distribution of protected mRNA fragments provides a powerful approach for distinguishing 80S footprints from other sources (Ingolia et al., manuscript in preparation). Footprints in C-terminal extensions exhibit a length distribution very similar to footprints in coding regions, while those derived from non-coding sources, such as snoRNAs and tRNAs, show dramatically different length distributions (<xref ref-type="fig" rid="fig4">Figure 4B</xref>). Thus, footprints aligning to extensions appear to be protected by 80S ribosomes.</p><p>Finally, we sought to determine whether the ribosomes that appear to translate extensions are engaged in active translation, as opposed to some aberrant process of stalling or slippage (e.g., as described in <xref ref-type="bibr" rid="bib66">Skabkin et al., 2013</xref>). Because terminating ribosomes produce a characteristic peak of ribosome density over annotated stop codons (<xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>; <xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>), we asked whether the stop codons that terminate the C-terminal extensions also showed this behavior. Indeed, C-terminal extensions exhibit peaks at their stop codons, clearly arguing that footprint density in C-terminal extensions is attributable to actively-translating ribosomes (<xref ref-type="fig" rid="fig4">Figure 4C</xref>).</p><p>Because this meta-gene analysis represents a group average, we also compiled individual statistics on ribosome release in a manner similar to the RRS score described by <xref ref-type="bibr" rid="bib23">Guttman et al. (2013)</xref> . Briefly, we tabulated the ratio of the total number of reads aligning within a five codon window immediately downstream of a stop codon to the number of reads aligning to the five codon window immediately upstream of that codon, with the expectation that if ribosomes terminate at a given stop codon, the score for that codon should approach zero. We performed this calculation separately for: (1) stop codons that terminate annotated coding regions, (2) stop codons that terminate C-terminal extensions, and (3) as a negative control, randomly selected codons internal to annotated coding regions. We find that the scores of stop codons that terminate C-terminal extensions fall within the distribution of scores for stop codons that terminate annotated coding regions (<xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref>), again arguing that the read density covering putative C-terminal extensions are in fact produced by ribosomes that have undergone stop codon readthrough rather than other processes.</p></sec><sec id="s2-6"><title>Readthrough produces detectable translation products</title><p>It is possible that the population of ribosomes that read through stop codons is engaged in a pathological translation process that might not produce detectable protein products. We therefore asked whether we could detect translation products by immunoprecipitation (IP) and western blotting. We created reporter constructs for a panel of transcripts including five predicted and 10 novel extensions that exhibited readthrough in both 0–2 hr embryos and S2 cells. In each construct, we fused Venus (a GFP variant) upstream of a portion of each transcript containing the C-terminal 120 codons of the annotated coding sequence and the entire endogenous 3′ UTR. To visualize readthrough, we fused a double FLAG epitope to the C-terminus of the putative C-terminal extension. We transfected these constructs into S2 cells, immunoprecipitated the reporter at the N-terminus using anti-GFP beads, and detected the extensions by western blotting using an anti-FLAG antibody. We detected readthrough products of the correct size for eight of the reporters, arguing that at least this subset of extensions yields C-terminally extended proteins in vivo (<xref ref-type="fig" rid="fig4">Figure 4D</xref>).</p><p>While we did not seek to detect C-terminally extended proteins generated by endogenous transcripts (e.g., through mass spectrometry), we do believe our reporter constructs to be at least as faithful as those used in earlier literature, as we included substantially more nucleotide context (120 codons upstream of stop plus the entire endogenous 3′ UTR) than other groups screening through candidate genes to find readthrough signals (2–8 codons upstream and 3–15 codons downstream of the stop codon; <xref ref-type="bibr" rid="bib14">Fearon et al., 1994</xref>; <xref ref-type="bibr" rid="bib26">Harrell et al., 2002</xref>; <xref ref-type="bibr" rid="bib57">Namy et al., 2002</xref>, <xref ref-type="bibr" rid="bib56">2003</xref>).</p></sec><sec id="s2-7"><title>Extensions are not products of selenocysteine insertion, genomic polymorphisms, or mRNA editing</title><p>The appearance of stop codon readthrough, both in ribosome profiling data and in IP-westerns, could result from several other processes, such as selenocysteine insertion, genomic mutation of stop codons to sense codons, or the editing of stop codons in mRNAs. We consider each of these in turn.</p><p>UGA stop codons may be decoded by specialized translation machinery as the unconventional amino acid selenocysteine if the 3′ UTR contains a selenocysteine insertion (SECIS) element. However, UGA stop codons represent only 25% of the readthrough events we report, and none of these are annotated as selenoproteins in either FlyBase (<xref ref-type="bibr" rid="bib50a">Marygold et al., 2013</xref>) or SelenoDB (<xref ref-type="bibr" rid="bib6">Castellano et al., 2008</xref>). Furthermore, we were unable to detect SECIS elements in any of their 3′ UTRs using SeciSearch 2.19 (<xref ref-type="bibr" rid="bib43">Kryukov et al., 2003</xref>). Thus, at most, even unannotated selenocysteine insertion events could only account for a small fraction of the readthrough events we report.</p><p>We also exclude the possibility that readthrough might result from genomic polymorphisms or RNA editing at the stop codon. Because both types of events would be represented in our data as mismatches between read alignments and the reference transcript sequence, we counted the total number of matching and mismatching reads covering each nucleotide position in each stop codon in our mRNA-seq and ribosome footprint datasets. For each dataset, we calculated a global average proportion of mismatching reads, and used the binomial test to identify stop codon nucleotides whose individual proportion of mismatches significantly deviated from the corresponding global average.</p><p>Together, the mRNA and footprint datasets identified a total of 10 nucleotide positions whose mismatch rates significantly exceeded the average (<xref ref-type="fig" rid="fig4">Figure 4E,F</xref>). Three positions contained genomic polymorphisms that changed one stop codon to another stop codon (<xref ref-type="fig" rid="fig4">Figure 4E,F</xref>, green). Two (<xref ref-type="fig" rid="fig4">Figure 4E,F</xref>, red) contained genomic polymorphisms that converted the stop codon to a sense codon. These two transcripts were therefore excluded from further study. The remaining five positions contained a variety of mismatches each occurring at low frequency. These observations are inconsistent with the presence of a genomic polymorphism at those positions, which should cause a 50% or 100% frequency of a single mismatch, depending on whether the polymorphism is hetero- or homozygous (<xref ref-type="fig" rid="fig4">Figure 4E,F</xref>, black).</p><p>An alternate explanation for a low but elevated proportion of mismatches is RNA editing, the conversion of one nucleotide to another in an mRNA. In <italic>Drosophila,</italic> the only mechanism known to edit mRNA is the deamination of adenine to inosine, which is converted to guanine by reverse transcriptase (<xref ref-type="bibr" rid="bib64">Ramaswami et al., 2013</xref>). A-to-I editing thus appears in sequencing data as a preference for A-to-G transitions among mismatches. Of the five mismatching positions we could not ascribe to genomic polymorphisms, four contain thymine or guanine rather than adenine residues in the reference sequence, and therefore cannot be edited by this pathway. We therefore attribute these mismatches to sequencing error. The majority of mismatches at the single remaining position are transversions from adenine to thymine, similarly arguing that these mismatches are more likely due to sequencing error than to A-to-I editing.</p><p>Formally, it is possible that a minor fraction of transcripts are edited, but that this fraction, even if small as measured in the RNA-seq or total footprint data, might account for all of the stop codon readthrough we observe. Analysis of the ribosome footprint data allows us to explore this possibility directly. Specifically, were this the case, the sequences of all the footprints deriving from ribosomes that have undergone readthrough—namely, those whose A-sites have already translated the stop codon—should contain evidence of editing (<xref ref-type="fig" rid="fig4">Figure 4G</xref>, top). We therefore separately analyzed the footprints deriving from this specific pool of ribosomes. Our dataset provided sufficient coverage to test 419 of 450 such positions (93% of the total). Of these, only four stop codon positions exhibited significantly elevated levels of mismatch (<xref ref-type="fig" rid="fig4">Figure 4G</xref>, bottom). All of these were identified in the mRNA and total footprint datasets above as having genomic polymorphisms (<xref ref-type="fig" rid="fig4">Figure 4E–F</xref>). Thus, our most stringent dataset contains no positive evidence of RNA editing.</p><p>Further, this dataset contains positive evidence against RNA editing. Under the null hypothesis that A-to-I editing drives readthrough, one would expect nearly all footprints (for our purposes, conservatively assuming 90%) in the A-site footprint dataset to contain an edited base. Under this assumption, we used a binomial test to estimate the probability of observing the proportion of A-to-G mismatches in the A-site footprint dataset at each adenine residue sufficiently covered by reads (217 positions, representing roughly 50% of A positions in all readthrough events reported). In this analysis, all positions contained significantly fewer A-to-G mismatches than expected under the hypothesis of A-to-I editing, (Bonferonni-corrected p<<0.05 for all transcripts), indicating that A-to-I editing plays no part in any of the readthrough events we could test.</p></sec><sec id="s2-8"><title>Readthrough occurs in <italic>Saccharomyces cerevisiae</italic> and human foreskin fibroblasts</title><p>Because we detected far more readthrough events in <italic>Drosophila</italic> than were predicted from phylogenetic data, we collected yeast datasets and examined them for empirical evidence of readthrough. Importantly, because the [<italic>PSI</italic><sup><italic>+</italic></sup>] form of the yeast eRF3 homologue is known to promote readthrough, we limited our analysis to data collected from [<italic>psi</italic><sup><italic>−</italic></sup>] strains.</p><p>In contrast to MNase (which exhibits a 3′ A/T cutting bias, yielding positional uncertainty of the ribosomal P-site, see ‘Materials and methods’), RNAse I shows little cutting bias. Therefore, libraries prepared with RNase I (e.g., yeast and mammalian libraries) offer superior spatial resolution along mRNAs. In such libraries, the locations of ribosome-protected footprint fragments in coding regions exhibit a characteristic three-nucleotide periodicity or <italic>phasing</italic> from which reading frames can be deduced (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>, <xref ref-type="bibr" rid="bib31">2011</xref>). We therefore tabulated the phasing of ribosome-protected footprint fragments in all annotated coding regions, putative C-terminal extensions, and the 40 codon windows downstream of the putative extensions as an approximation of the portion of the 3′ UTR distal to the putative extension (hereafter called ‘distal 3′ UTRs’). To control for cloning biases caused by skewed nucleotide frequencies at each phase, we tabulated the phasing of randomly-fragmented mRNA fragments that were cloned using the same protocol and aligned to the same regions. Non-random phasing consistent with translation is apparent in both the coding regions and the putative extensions, but not the distal 3′ UTR (p=3.98 × 10<sup>−26</sup>, Χ<sup>2</sup> test, footprints vs mRNA fragments in extension, dof = 2; <xref ref-type="fig" rid="fig5">Figure 5A</xref>). Importantly, the major component of phasing in the putative extensions occurs in the same reading frame as that of coding regions, indicating that readthrough (as opposed to, e.g., frameshift) is a major contributor to protected fragment density in 3′ UTRs in yeast. Having found global evidence for readthrough, we manually scored a subset of yeast genes to identify individual examples of readthrough, using the same filtering and scoring criteria we used in the <italic>Drosophila</italic> datasets. We found 30 clear examples of readthrough in yeast (examples in <xref ref-type="fig" rid="fig5">Figure 5B,C</xref>; full results in supplementary table 3 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>), demonstrating that readthrough is not unique to <italic>Drosophila.</italic><fig-group><fig id="fig5" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.016</object-id><label>Figure 5.</label><caption><title>Readthrough occurs at specific stop codons in [<italic>psi</italic><sup>-</sup>] yeast and in human foreskin fibroblasts.</title><p>(a) Triplet periodicity of 28-mers from yeast data in all non-overlapping coding regions (CDS), putative C-terminal extensions, and distal 3’ UTRs indicates that a signature of translation readthrough is visible in extensions on a bulk scale. Distal 3’ UTRs were estimated as 40 codon windows following putative extensions. Putative extensions and distal 3’ UTRs that overlap annotated coding regions, snoRNAs, snRNAs, tRNAs or 5’ UTRs were excluded from the analysis. (<bold>B</bold> and <bold>C</bold>) Examples of yeast transcripts that undergo readthrough, as in <xref ref-type="fig" rid="fig3">Figure 3B</xref>. (<bold>D</bold> and <bold>E</bold>) Examples of transcripts that undergo readthrough in human foreskin fibroblasts, as in <xref ref-type="fig" rid="fig3">Figure 3B</xref>. (<bold>F</bold>) Distribution of readthrough rates, by organism, for all extensions of sufficient length not to be covered by bleedthrough from termination peaks (‘Materials and methods’). Dashed line: fifth percentile of readthrough rate in conserved extensions in <italic>D. melanogaster,</italic> 1.2%. Source data may be found in supplementary tables 2, 3, and 4 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.016">http://dx.doi.org/10.7554/eLife.01179.016</ext-link></p></caption><graphic xlink:href="elife01179f005"/></fig><fig id="fig5s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.017</object-id><label>Figure 5—figure supplement 1.</label><caption><title>In yeast and humans, reads mapping to C-terminal extensions are drawn from the same length distribution as reads mapping to coding regions.</title><p>(<bold>A</bold>) Length distributions of reads mapping to coding regions and extensions in yeast. (<bold>B</bold>) Length distributions of reads mapping to coding regions and extensions in human foreskin fibroblasts.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.017">http://dx.doi.org/10.7554/eLife.01179.017</ext-link></p></caption><graphic xlink:href="elife01179fs010"/></fig></fig-group></p><p>Because readthrough has been observed in two mammalian genes (<xref ref-type="bibr" rid="bib21">Geller and Rich, 1980</xref>; <xref ref-type="bibr" rid="bib78">Yamaguchi et al., 2012</xref>), we collected data from primary human foreskin fibroblasts and sought evidence of readthrough in humans. We identified 42 readthrough events in the human data (<xref ref-type="fig" rid="fig5">Figure 5D,E</xref>; full results in supplementary table 4 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>). These events are not explained by selenocysteine insertion, and, as in <italic>Drosophila,</italic> read lengths mapping to extensions in the yeast and human datasets are similar to those mapping to coding regions in these organisms (<xref ref-type="fig" rid="fig5s1">Figure 5—figure supplement 1</xref>). Thus, readthrough appears to be prevalent in all three organisms.</p><p>To estimate how many of the novel extensions we detected might be translated at a biologically significant level, we estimated a threshold for biological significance as the fifth percentile of readthrough rates for the phylogenetically conserved extensions that were translated in the <italic>D. melanogaster</italic> embryo, a rate of 1.2%. Out of all the extensions for which we could measure readthrough rates (i.e., those sufficiently long not to be covered by stop codon peaks, see ‘Materials and methods’), 61.8% of the novel extensions in <italic>Drosophila,</italic> 94.7% of the extensions in human foreskin fibroblasts, and 40.0% of the extensions in yeast exceeded this threshold, arguing that readthrough might be important in all three organisms (<xref ref-type="fig" rid="fig5">Figure 5F</xref>).</p></sec><sec id="s2-9"><title>Unpredicted C-terminal extensions show signs of recent evolutionary origin</title><p>Because 307 of the 350 readthrough events we discovered were not predicted phylogenetically, we sought to determine whether any of them showed signs of protein-coding conservation through the <italic>Drosophila</italic> phylogeny. To this end, we used PhyloCSF, which reports a log-likelihood ratio reflecting the relative probabilities of observing a given alignment of orthologous nucleotide sequences under models of protein-coding or non-coding evolution (<xref ref-type="bibr" rid="bib49">Lin et al., 2011</xref>). By this metric, only 14 of the 307 novel extensions score positively (<xref ref-type="fig" rid="fig6">Figure 6A</xref>), and their distribution of PhyloCSF scores was not markedly different from the global distribution (<xref ref-type="fig" rid="fig6s1">Figure 6—figure supplement 1A</xref>), indicating a lack of phylogenetic evidence for amino acid conservation.<fig-group><fig id="fig6" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.018</object-id><label>Figure 6.</label><caption><title>Novel C-terminal extensions in <italic>Drosophila melanogaster</italic> show signatures of selection within the melanogaster lineage.</title><p>(<bold>A</bold>) Scatter plot comparing readthrough rates for confirmed extensions against PhyloCSF scores. Blue: predicted extensions. Yellow: novel extensions. Datapoints with unreliably measured PhyloCSF scores or readthrough rates are not shown (‘Materials and methods’). (<bold>B</bold>) Z-curve classifier suggests that novel extensions have a nucleotide character intermediate between distal 3’ UTRs and coding regions. Histograms of Z-curve scores for 81-nucleotide windows drawn from annotated coding regions (CDS), distal 3’ UTRs, predicted extensions, and novel extensions. A single window was selected from each region 81 or more nucleotides long. Shorter regions were excluded from analysis, as they were empirically found to be noisy during classifier training. The Z-curve classifier was trained on windows drawn from CDS and distal 3’ UTRs as described in ‘Materials and methods’. (<bold>C</bold>) Novel extensions accumulate SNPs with a stronger preference than distal 3’ UTRs. Proportion of SNPs in CDS, predicted extensions, novel extensions, and distal 3’ UTRs which would be nonsynonymous if translated in frame. SNPs were obtained from wild isolates of wild-type flies by the Drosophila Population Genomics Project, and were downloaded from Ensembl (<xref ref-type="bibr" rid="bib17a">Flicek et al., 2013</xref>). Source data may be found in supplementary table 2 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.018">http://dx.doi.org/10.7554/eLife.01179.018</ext-link></p></caption><graphic xlink:href="elife01179f006"/></fig><fig id="fig6s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01179.019</object-id><label>Figure 6—figure supplement 1.</label><caption><title>Novel C-terminal extensions in <italic>Drosophila melanogaster</italic> show signatures of selection within the melanogaster lineage.</title><p>(<bold>A</bold>) Histogram of PhyloCSF scores for C-terminal extensions. Blue: phylogenetically predicted extensions that were confirmed in our datasets. Yellow: unpredicted extensions discovered in our datasets. Gray: global distribution of all potential extensions. The distribution of novel extensions is not substantially different from the global distribution, suggesting that many of these extensions are not phylogenetically conserved beyond <italic>melanogaster</italic>. Source data may be found in supplementary table 2 (at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>). (<bold>B</bold>) A second Z-curve classifier was trained on 81-nucleotide windows of coding regions, and 81-nucleotide windows of distal 3′ UTRs, but excluding the last 50 bases of annotated UTR to remove potential effects of polyadenylation signals upon classifier scoring. As in <xref ref-type="fig" rid="fig6">Figure 6B</xref>, predicted extensions overlay coding regions, and novel extensions display a significant shift in median from distal 3′ UTRs (p=3.81 × 10–22, Mann–Whitney U test), indicating the shift identified in <xref ref-type="fig" rid="fig6">Figure 6B</xref> is not due to polyadenylation signals.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.019">http://dx.doi.org/10.7554/eLife.01179.019</ext-link></p></caption><graphic xlink:href="elife01179fs011"/></fig></fig-group></p><p>The lack of detectable phylogenetic evidence of amino acid conservation among the novel extensions suggests two models: either (1) the novel extensions, on average, are selectively neutral, and occur only because they do not incur too great a fitness disadvantage, or (2) the novel extensions are under selection, but originated after the divergence of <italic>D. melanogaster</italic> from its closest sequenced relatives<italic>,</italic> making conservation in this group undetectable by cross-species tools such as PhyloCSF. To distinguish these possibilities, we used two tests to detect signs of selection for protein coding specifically within <italic>D. melanogaster.</italic></p><p>To determine whether the nucleotide sequences of novel extensions show signs of selection for protein coding potential, we implemented a Z-curve classifier, a machine-learning technique that separates coding regions from non-coding regions based upon phased differences in nucleotide <italic>k</italic>-mer frequency (<xref ref-type="bibr" rid="bib20">Gao and Zhang, 2004</xref>). We trained the classifier to distinguish annotated coding regions from distal 3′ UTRs (see ‘Materials and methods’ for details). Consistent with a long history of protein-coding selection, extensions predicted by phylogenetic conservation showed a nucleotide character indistinguishable from annotated coding regions (<xref ref-type="fig" rid="fig6">Figure 6B</xref>). By contrast, novel extensions exhibit a nucleotide character intermediate between coding regions and distal 3′ UTRs (p=1.02 × 10<sup>−23</sup>, Mann-Whitney U test, distal 3′ UTR vs novel extensions), which is consistent with an evolutionary trajectory towards coding-like character from a 3′ UTR. This effect is not due to specific nucleotide signals found in distal 3′ UTRs (p=3.81 × 10<sup>−22</sup>, <xref ref-type="fig" rid="fig6s1">Figure 6—figure supplement 1B</xref>), and was robust across Z-curve classifiers trained on different windows drawn from distal 3′ UTRs (see ‘Materials and methods’).</p><p>To obtain more direct evidence for or against protein-coding selection, we analyzed SNP data from 50 individuals of <italic>D. melanogaster</italic> from the <italic>Drosophila</italic> Population Genomics Project (<ext-link ext-link-type="uri" xlink:href="http://www.dpgp.org/">http://www.dpgp.org</ext-link>). We determined the proportion of SNPs that would be synonymous if translated in-frame in coding regions, predicted extensions, novel extensions, and distal 3′ UTRs. Novel extensions show a modest but significant preference for synonymous SNPs above the background level of distal 3′ UTRs (<xref ref-type="fig" rid="fig6">Figure 6C</xref>; p=1.42 × 10<sup>−5</sup>, one-sided Fisher’s exact test), but below that of the predicted extensions (p=8.42 × 10<sup>−9</sup>, one-sided Fisher’s exact test). This pattern suggests that a subset of the novel extensions is undergoing selection for protein coding, and that the contribution from this subset to the average SNP preference outweighs the contributions from other subsets of extensions that are selectively neutral or undergoing diversifying selection. Together, these results favor the hypothesis that at least a fraction of the novel extensions are of recent evolutionary origin and have come under selection within the <italic>melanogaster</italic> lineage.</p></sec><sec id="s2-10"><title>Readthrough is regulated individually for specific transcripts</title><p>In order to determine whether C-terminal extensions might be functional, we sought evidence for biological regulation of readthrough rates. We therefore queried our S2 cell and embryo datasets for evidence of differential regulation of readthrough in all genes that were sufficiently expressed in both datasets and contain only one, unique annotated coding region across all transcripts.</p><p>For each gene meeting these criteria, we tabulated the number of ribosome-protected footprints in the corresponding coding region and extension in each tissue type, and calculated a p value for the observed distribution of counts using Fisher’s exact test. Controlling the false discovery rate at 5%, we found nine of 182 testable transcripts to significantly change between samples, indicating that all nine should be true positives (<xref ref-type="table" rid="tbl1">Table 1</xref>; full data in supplementary table 2 at Dryad: <xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>). Thus, readthrough is differentially regulated between <italic>Drosophila</italic> cell types.<table-wrap id="tbl1" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.020</object-id><label>Table 1.</label><caption><title>Readthrough is differentially regulated between 0–2 hr embryos and S2 cells</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.020">http://dx.doi.org/10.7554/eLife.01179.020</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Gene ID</th><th>Alias</th><th>Embryo readthrough rate</th><th>S2 readthrough rate</th><th>PhyloCSF score</th><th>p value</th><th>log<sub>10</sub> fold change</th><th>Direction of change</th></tr></thead><tbody><tr><td>FBgn0036824</td><td>CG3902</td><td>7.15E−01</td><td>2.46E−03</td><td>−241.07</td><td>6.55E−10</td><td>−2.46</td><td>↓</td></tr><tr><td>FBgn0004362</td><td>HmgD</td><td>8.82E−03</td><td>1.21E−02</td><td>−747.85</td><td>7.08E−07</td><td>0.14</td><td>↑</td></tr><tr><td>FBgn0035432</td><td>ZnT63C</td><td>7.17E−03</td><td>2.71E−02</td><td>181.26</td><td>1.14E−06</td><td>0.58</td><td>↑</td></tr><tr><td>FBgn0010409</td><td>RpL18A</td><td>1.39E−02</td><td>2.08E−03</td><td>−197.78</td><td>5.85E−06</td><td>−0.83</td><td>↓</td></tr><tr><td>FBgn0039218</td><td>Rpb10</td><td>5.18E−03</td><td>2.03E−02</td><td>−333.38</td><td>8.06E−06</td><td>0.59</td><td>↑</td></tr><tr><td>FBgn0038100</td><td>Paip2</td><td>2.10E−02</td><td>4.60E−03</td><td>−497.09</td><td>3.71E−05</td><td>−0.66</td><td>↓</td></tr><tr><td>FBgn0261790</td><td>SmE</td><td>7.55E−03</td><td>7.80E−04</td><td>−530.28</td><td>9.60E−04</td><td>−0.99</td><td>↓</td></tr><tr><td>FBgn0030991</td><td>CG7453</td><td>2.18E−01</td><td>5.28E−02</td><td>−164.36</td><td>2.00E−03</td><td>−0.62</td><td>↓</td></tr><tr><td>FBgn0043796</td><td>CG12219</td><td>2.85E−01</td><td>1.90E+00</td><td>−27.83</td><td>2.11E−03</td><td>0.82</td><td>↑</td></tr></tbody></table><table-wrap-foot><fn><p>For each transcript, the number of reads aligning to the CDS and corresponding extension were tabulated in both embryo and S2 cell datasets. p values for significant changes were calculated using Fisher’s Exact Test. The False Discovery Rate was controlled at 5% using the procedures of Benjamini and Hochberg (‘Materials and methods’), yielding nine transcripts with significant p values.</p></fn></table-wrap-foot></table-wrap></p><p>In principle, readthrough could be regulated by: (1) changes in the expression or activities of global factors (e.g., eukaryotic release factors, charged tRNA abundance etc), (2) by gene- or transcript-specific elements, like mRNA structures, or (3) by a combination of both. In the first scenario, readthrough rates for all transcripts should increase or decrease monotonically in one cell or tissue type compared to another. In the latter two scenarios, readthrough rates should increase for some transcripts, but decrease for others. We identified four significant increases and five significant decreases in readthrough rate in embryos compared to S2 cells, indicating that readthrough is at least in part regulated on a transcript-by-transcript basis. The distribution of fold-changes in readthrough rate spans several orders of magnitude, indicating that transcripts that are robustly read through in one cell type are not necessarily read through in another (<xref ref-type="table" rid="tbl1">Table 1</xref>). This result implies that extensions function in specific cellular or developmental contexts, consistent with earlier reports that readthrough of specific genes is regulated in metazoans (<xref ref-type="bibr" rid="bib65">Robinson and Cooley, 1997</xref>; <xref ref-type="bibr" rid="bib78">Yamaguchi et al., 2012</xref>).</p><p>Because we observe such a large magnitude of regulation, we believe the 350 readthrough events we report here to represent a small subset of a larger group that occur throughout the lifetime of an individual fly. We therefore expect many of the extensions that were phylogenetically predicted but not observed in our samples are in fact translated at other developmental stages in <italic>Drosophila.</italic> Finally, because transcripts with significant p values are statistically more highly counted in their extensions than those without significant p values (p=2.4 × 10<sup>−3</sup>, Mann-Whitney U test), we surmise that our ability to detect regulation was limited by sequencing depth and that the true number of transcripts whose readthrough rates are regulated in tissue- or condition-specific manners is in fact larger than we report.</p></sec><sec id="s2-11"><title>Extensions contain functional nuclear localization signals</title><p>Many peptide sequences—such as signal sequences, degrons, and phosphorylation sites—affect the localization, stability, or activity of proteins. Because these sequences are frequently short and/or degenerate, a high proportion of even random peptide sequences confer function (<xref ref-type="bibr" rid="bib36">Kaiser et al., 1987</xref>; <xref ref-type="bibr" rid="bib35">Kaiser and Botstein, 1990</xref>). Thus, a C-terminal extension produced by termination failure could purely by chance alter the function or behavior of its host protein, and thus come under selection. Indeed, <xref ref-type="bibr" rid="bib18">Freitag et al. (2012)</xref> reported two readthrough events in fungi that append peroxisomal localization signals (PTS1) to the C-termini of glyceraldehyde-3-phosphate dehydrogenase (GAPDH) and 3-phosphoglycerate kinase, enabling these typically cytosolic enzymes to function in peroxisomal metabolism. We therefore searched our full set of C-terminal extensions for short peptide signals that direct peroxisome localization, nuclear localization (NLS), prenylation, or ER retention, or that resemble transmembrane domains (see ‘Materials and methods’). PTS1 signals were detected in one extension. 10 proteins not annotated as nuclear in FlyBase contain predicted NLSes in their extensions. Eight extensions contain predicted transmembrane domains and one contains a C-terminal prenylation signal. No extension contained an ER retention signal (<xref ref-type="table" rid="tbl2">Table 2</xref>).<table-wrap id="tbl2" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.021</object-id><label>Table 2.</label><caption><title>C-terminal extensions contain predicted functional peptide signals</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.021">http://dx.doi.org/10.7554/eLife.01179.021</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Gene ID</th><th>Alias</th><th>Extension coordinates</th><th>PhyloCSF score</th><th>Signal detected</th></tr></thead><tbody><tr><td>FBgn0000173</td><td>ben</td><td>X:13892649–13892781(+)</td><td>−302.18</td><td>NLS</td></tr><tr><td>FBgn0005278</td><td>Sam-S</td><td>2L:113542–113647(+)</td><td>−195.30</td><td>NLS</td></tr><tr><td>FBgn0026144</td><td>CBP</td><td>X:7235840–7236599(+)</td><td>128.52</td><td>NLS</td></tr><tr><td>FBgn0031897</td><td>CG13784</td><td>2L:7206347–7208015(−)</td><td>4775.49</td><td>NLS</td></tr><tr><td>FBgn0033712</td><td>CG13163</td><td>2R:8209607–8209934(+)</td><td>−675.02</td><td>NLS</td></tr><tr><td>FBgn0036272</td><td>CG4300</td><td>3L:12265284–12265557(−)</td><td>−193.87</td><td>NLS</td></tr><tr><td>FBgn0039213</td><td>atl</td><td>3R:20459429-20459720(+)</td><td>28.43</td><td>NLS</td></tr><tr><td>FBgn0260934</td><td>par-1</td><td>2R:15370912–15371608(+)</td><td>654.90</td><td>NLS</td></tr><tr><td>FBgn0261606</td><td>RpL27A</td><td>2L:4457220–4457289^4457374–4457380(−)</td><td>−148.56</td><td>NLS</td></tr><tr><td>FBgn0262114</td><td>RanBPM</td><td>2R:6322727–6323228(+)</td><td>1045.90</td><td>NLS</td></tr><tr><td>FBgn0031683</td><td>CG4230</td><td>2L:5098384–5098573(+)</td><td>−5.34</td><td>Transmembrane domain</td></tr><tr><td>FBgn0033712</td><td>CG13163</td><td>2R:8209607–8209934(+)</td><td>−675.02</td><td>Transmembrane domain</td></tr><tr><td>FBgn0035498</td><td>Fit1</td><td>3L:4106386–4106518(+)</td><td>−323.36</td><td>Transmembrane domain</td></tr><tr><td>FBgn0036980</td><td>RhoBTB</td><td>3L:20374798–20374821^20374891–20374982(+)</td><td>154.91</td><td>Transmembrane domain</td></tr><tr><td>FBgn0037321</td><td>CG1172</td><td>3R:1221902–1222220(+)</td><td>−624.55</td><td>Transmembrane domain</td></tr><tr><td>FBgn0040813</td><td>Nplp2</td><td>3L:13350197–13350296(+)</td><td>−242.85</td><td>Transmembrane domain</td></tr><tr><td>FBgn0053523</td><td>CG33523</td><td>3L:5922386-5922854(+)</td><td>383.85</td><td>Transmembrane domain</td></tr><tr><td>FBgn0263864</td><td>Ark</td><td>2R:12913933-12914062(+)</td><td>−123.89</td><td>Transmembrane domain</td></tr><tr><td>FBgn0039690</td><td>CG1969</td><td>3R:25567115–25567154(+)</td><td>11.52</td><td>PTS1</td></tr><tr><td>FBgn0035540</td><td>Syx17</td><td>3L:4404848–4404983(+)</td><td>290.83</td><td>Farnesyltransferase signal</td></tr></tbody></table><table-wrap-foot><fn><p>Peptide sequences of C-terminal extensions were examined using various prediction servers (see ‘Materials and methods’). Those containing predicted features are shown here. NLS: nuclear localization signal. PTS1: peroxisome localization signal. Coordinates are 0-indexed and half-open. Splice junctions are denoted with carrots (‘^’). Strands are indicated in parentheses.</p></fn></table-wrap-foot></table-wrap></p><p>To determine whether any of the putative nuclear localization signals (NLSes) function in vivo<italic>,</italic> we constitutively fused C-terminal extensions containing putative NLSes to the C-terminus of a GFP-mCherry-GST reporter, which is excluded from the nucleus (<xref ref-type="fig" rid="fig7">Figure 7</xref>, left column; <xref ref-type="bibr" rid="bib7">Chan et al., 2007</xref>). When expressed in S2 cells, three of four NLSes relocalized the cytosolic reporter to the nucleus at levels above background (<xref ref-type="fig" rid="fig7">Figure 7</xref>, columns 3–5), arguing that these extensions can regulate the localization of their endogenous host proteins. Given the large number of short peptide signals (e.g., phosphorylation motifs, degradation motifs, ubiquitination sequences, etc) that have been discovered, and the limited number of reporters we tested here, we likely underestimate the number of extensions that confer function. Nonetheless, our results clearly establish that C-terminal extensions can alter protein function.<fig id="fig7" position="float"><object-id pub-id-type="doi">10.7554/eLife.01179.022</object-id><label>Figure 7.</label><caption><title>Extensions contain functional localization signals.</title><p>Ordinarily, a GFP-mCherry-GST reporter is excluded from the nucleus (first column). When an SV40 NLS is appended to the reporter, it is predominantly nuclear (second column). Three extensions also contain functional NLSes which at least partially relocalize the reporter to the nucleus when constitutively fused to it (remaining columns). First row: GFP reporter. Second row: nuclei stained with Hoechst. Third row: merged GFP and Hoechst. Fourth row: DIC.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.022">http://dx.doi.org/10.7554/eLife.01179.022</ext-link></p></caption><graphic xlink:href="elife01179f007"/></fig></p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>Here we present the first comprehensive study of stop codon readthrough in a eukaryote. Using empirical data, we identified 350 readthrough events in <italic>Drosophila melanogaster,</italic> the vast majority of which were not predicted from phylogenetic signatures. We further demonstrate that readthrough occurs in yeast and humans. Our studies indicate that readthrough is far more pervasive than previously appreciated, is biologically regulated, and may append functional peptide signals to host proteins. Together, these results argue that stop codon readthrough provides an important mechanism to regulate gene expression and function. Our work further suggests that readthrough provides an important means for genes to acquire new functions throughout the course of evolution.</p><p>Mechanistic studies of readthrough in various systems have implicated many factors in the modulation of readthrough rates. These include the identity of the stop codon (<xref ref-type="bibr" rid="bib65">Robinson and Cooley, 1997</xref>; <xref ref-type="bibr" rid="bib8">Chao et al., 2003</xref>; <xref ref-type="bibr" rid="bib58">Napthine et al., 2012</xref>), nucleotide context surrounding the stop codon (<xref ref-type="bibr" rid="bib3">Bonetti et al., 1995</xref>; <xref ref-type="bibr" rid="bib51">McCaughan et al., 1995</xref>; <xref ref-type="bibr" rid="bib5">Cassan and Rousset, 2001</xref>; <xref ref-type="bibr" rid="bib8">Chao et al., 2003</xref>), local or distant RNA structures (<xref ref-type="bibr" rid="bib76">Wills et al., 1991</xref>; <xref ref-type="bibr" rid="bib15">Feng et al., 1992</xref>; <xref ref-type="bibr" rid="bib69">Steneberg and Samakovlis, 2001</xref>; <xref ref-type="bibr" rid="bib9">Cimino et al., 2011</xref>; <xref ref-type="bibr" rid="bib17">Firth et al., 2011</xref>; <xref ref-type="bibr" rid="bib58">Napthine et al., 2012</xref>), specific hexanucleotide sequences (<xref ref-type="bibr" rid="bib67">Skuzeski et al., 1991</xref>; <xref ref-type="bibr" rid="bib26">Harrell et al., 2002</xref>), snoRNA-mediated pseudouridylation of stop codons (<xref ref-type="bibr" rid="bib37">Karijolich and Yu, 2011</xref>), the identity of the tRNA present in the ribosomal P-site (<xref ref-type="bibr" rid="bib55">Mottagui-Tabar et al., 1998</xref>), the peptide sequence of the nascent chain (<xref ref-type="bibr" rid="bib55">Mottagui-Tabar et al., 1998</xref>), the concentrations of endogenous suppressor tRNAs (reviewed in <xref ref-type="bibr" rid="bib1">Beier and Grimm, 2001</xref>), and proteins that bind the ribosome or mRNA (<xref ref-type="bibr" rid="bib38">Keeling et al., 2004</xref>; <xref ref-type="bibr" rid="bib27">Hatin et al., 2007</xref>; <xref ref-type="bibr" rid="bib22">Green et al., 2012</xref>). With the exception of the readthrough signal identified in Tobacco mosaic virus (<xref ref-type="bibr" rid="bib67">Skuzeski et al., 1991</xref>), the majority of readthrough events that have been mechanistically characterized are regulated by two or more such factors, often in complex, context-specific ways. For example, downstream nucleotide contexts which promote readthrough of one stop codon can inhibit readthrough of other stop codons, and these effects can be non-linearly synergistic with upstream nucleotide contexts (<xref ref-type="bibr" rid="bib3">Bonetti et al., 1995</xref>).</p><p>Such complexity is advantageous insofar as it allows readthrough rates to be independently regulated for each transcript, consistent with our own observations. Unsurprisingly, however, this complexity has hindered efforts to identify simple <italic>cis</italic>-acting sequence elements that deterministically predict readthrough, and underscores the importance of having a method to measure readthrough empirically in a physiological setting in vivo<italic>.</italic> By using ribosome profiling to measure readthrough rates over a variety of tissue types and developmental stages, it may be possible to decompose the regulatory complexity into individual components, and then determine the <italic>cis</italic>-acting elements that collaborate to regulate readthrough in tissue-specific manners.</p><p>Just as alternative splicing provides a means for proteins to acquire new domains or functional modules, we propose, along with the Lindquist (<xref ref-type="bibr" rid="bib74">True and Lindquist, 2000</xref>) and Kellis (<xref ref-type="bibr" rid="bib34">Jungreis et al., 2011</xref>) groups, that stop codon readthrough can provide a mechanism for proteins to evolve at the C-terminus. In this model, transcripts that contain contexts favorable to leaky termination would yield substoichiometric, C-terminally extended populations of cellular proteins. If a particular extension is deleterious, natural selection can favor mutations in the corresponding mRNA that promote efficient termination rather than readthrough. If, instead, the extension provides a fitness advantage, selection can act upon both its amino acid sequence (to tune its function), as well as the nucleotide sequence of its mRNA (to increase or otherwise regulate its readthrough rate). In extreme cases, where an extension is universally advantageous, a mutation that changes a stop codon to a sense codon might become fixed, resulting in a constitutively extended gene. Conceivably, the two C-terminal extensions that we discovered to contain sense codons in place of their annotated stop codons could be the end result of this process.</p><p>Several lines of evidence are consistent with this evolutionary model. First, non-zero readthrough rates (0.02–1.4%) have been observed even for control non-readthrough reporter constructs in [<italic>psi</italic><sup><italic>−</italic></sup>] yeast (<xref ref-type="bibr" rid="bib14">Fearon et al., 1994</xref>; <xref ref-type="bibr" rid="bib3">Bonetti et al., 1995</xref>; <xref ref-type="bibr" rid="bib57">Namy et al., 2002</xref>; <xref ref-type="bibr" rid="bib38">Keeling et al., 2004</xref>; <xref ref-type="bibr" rid="bib72">Torabi and Kruglyak, 2011</xref>) and mammalian cells (<xref ref-type="bibr" rid="bib17">Firth et al., 2011</xref>; <xref ref-type="bibr" rid="bib58">Napthine et al., 2012</xref>), arguing that under typical conditions in a variety of eukaryotes, there is a small pool of C-terminally-extended proteins, originating from a wide variety of genes, available for selection to act upon.</p><p>Secondly, in specific circumstances, selection appears to favor leaky termination and its extension products. Torabi et al. (<xref ref-type="bibr" rid="bib72">Torabi and Kruglyak, 2011</xref>) reported that in a panel of wild strains of [<italic>psi</italic><sup><italic>−</italic></sup>] yeast, allelic combinations of <italic>SUP45</italic> and <italic>TRM10</italic> that promote and inhibit readthrough appear to be in balancing selection, implying that a low baseline level of readthrough is beneficial. Similarly, numerous reports have demonstrated that wild strains of yeast exhibit [<italic>PSI</italic><sup><italic>+</italic></sup>]-dependent fitness advantages in a variety of stress conditions, arguing that functions conferred by C-terminal extensions can provide adaptive advantages (<xref ref-type="bibr" rid="bib74">True and Lindquist, 2000</xref>; <xref ref-type="bibr" rid="bib24">Halfmann et al., 2012</xref>).</p><p>Thirdly, extensions have a high probability of conferring function without prior tuning by natural selection. This point is illustrated by the studies of Kaiser and Botstein, which demonstrated that a large proportion—roughly 30%—of randomly-generated peptide sequences are functional, insofar as they can relocalize a cytosolic form of invertase to the nucleus, mitochondrion, or endoplasmic reticulum in yeast (<xref ref-type="bibr" rid="bib36">Kaiser et al., 1987</xref>; <xref ref-type="bibr" rid="bib35">Kaiser and Botstein, 1990</xref>). Given the large number of short peptide signals now known (e.g., D-boxes, KEN-boxes, SH3 binding epitopes, phosphorylation sites, etc), it is likely that a far greater fraction of random peptide sequences contain at least one functional signal. Consistent with this hypothesis, we discovered C-terminal extensions that are not phylogenetically conserved nonetheless contained functional NLSes in <italic>Drosophila.</italic> Furthermore, because these short signals are modular, their addition to the C-terminus of a protein can confer novel function, without requiring modification or coevolution of the host protein. In this way, even novel C-terminal extensions arising purely from termination failure can immediately alter the behavior of their host proteins, in beneficial or deleterious ways, and thus come under selection. Over evolutionary time, this process could yield phylogenetically conserved readthrough events or, in extreme cases, constitutively extended proteins.</p><p>Our model predicts that, at any given moment, ribosome profiling should detect a broad spectrum of conservation among readthrough events: at one extreme are ancient, phylogenetically conserved extensions, and, at the other, extensions of recent evolutionary origin. Between these, one would find extensions under varying degrees of age, conservation, and selection. This notion is borne out in our data: in <italic>Drosophila,</italic> a subset of readthrough events are well supported by conservative codon substitutions across the phylogeny (<xref ref-type="bibr" rid="bib48">Lin et al., 2007</xref>; <xref ref-type="bibr" rid="bib34">Jungreis et al., 2011</xref>), but a far larger set is not conserved between species. In aggregate, this non-conserved group shows weak but statistically significant signals of selection among fifty wild-type individuals of <italic>D. melanogaster</italic> (<xref ref-type="fig" rid="fig6">Figure 6C</xref>), suggesting that a fraction of this group is undergoing protein coding selection. The remainder might include many other different groups of extensions: namely, a group of extensions undergoing diversifying selection, a group of deleterious extensions undergoing counterselection, and a group of selectively-neutral extensions subject to genetic drift.</p><p>Finally, our model predicts that conserved extensions should on average exhibit higher readthrough rates than novel extensions, because only a subset of the latter group would have been selected for function and regulation. Our data is also consistent with this prediction: the median readthrough rate for the conserved extensions in <italic>Drosophila</italic> is 5.2%, while the median for the novel extensions is 1.7%. Notably, 62% of the novel extensions we identified in <italic>Drosophila,</italic> 95% of the extensions in human foreskin fibroblasts, and 40% of the extensions in yeast undergo readthrough at rates comparable to those of phylogenetically conserved extensions (<xref ref-type="fig" rid="fig5">Figure 5F</xref>), arguing for the importance of these subsets.</p><p>Broadly, our work builds upon the growing amount of evidence that eukaryotic genomes and proteomes are far more plastic than previously thought, particularly with regard to translation and coding. In addition to the large number C-terminal extensions we report, various groups have used ribosome profiling to determine that large numbers of genes are regulated by uORFs that initiate at near-cognate start codons (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>; <xref ref-type="bibr" rid="bib4">Brar et al., 2012</xref>), that many genes can be N-terminally extended in a regulated manner (<xref ref-type="bibr" rid="bib19">Fritsch et al., 2012</xref>), and even that many parts of mammalian genomes are decoded in multiple frames (<xref ref-type="bibr" rid="bib54">Michel et al., 2012</xref>). Given the preeminence of <italic>Drosophila</italic> as a developmental model and the abundance of conditional genetic tools available, we anticipate that ribosome profiling in <italic>Drosophila</italic> will be useful in deciphering the biological roles of not only readthrough, but all non-canonical translation events, throughout development.</p></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><sec id="s4-1"><title>Cell culture</title><p>Wild-type (<italic>y w</italic>) flies were cultured according to standard procedures. S2 cells were cultured in Schneider’s (Gibco by Life Technologies, Carlsbad, California) media supplemented with 10% heat-inactivated FBS (UCSF cell culture facility, San Francisco, California) and antibiotics (UCSF cell culture facility). S2 cells were transfected using Effectene reagent (Qiagen, the Netherlands) following the manufacturer’s instructions. For stable transfectants, the plasmid of interest was co-transfected at a 10:1 molar excess with pCoPuro. Stable integrants were selected and maintained in Schneider’s media supplemented as above, but additionally containing 10 μg/ml puromycin.</p></sec><sec id="s4-2"><title>Lysate preparation</title><sec id="s4-2-1"><title>S2 cells</title><p>16–20 hr before an experiment, cultures were diluted to 1.5–1.8 million cells/ml. To start the experiment, cells were treated for 2 min with 0.01 vol 2 mg/ml emetine (Sigma-Aldrich, St Louis, Missouri), pelleted for 2 min at 1600 rpm in a tabletop centrifuge, resuspended in 4–6 cell volumes cold polysome lysis buffer (50 mM Tris pH 7.5, 150 mM NaCl, 5 mM MgCl<sub>2</sub>, 0.5% Triton x-100, 1 mM DTT, 20 U/ml SuperaseIn (Ambion by Life Technologies), 20 μg/ml emetine), and homogenized on ice in a pre-chilled dounce homogenizer. The resulting lysate was clarified by spinning 10 min at 20,000 × <italic>g</italic> at 4°C in a microcentrifuge. Clarified lysate was aliquoted, flash-frozen in liquid nitrogen, and stored at −80°C. Experiments used 12–96 ml S2 cell culture, depending on the application. For ribosome profiling, a single 12 ml culture is sufficient.</p></sec><sec id="s4-2-2"><title>Embryos</title><p>0–2 hr old wild-type (<italic>y w</italic>) embryos were collected from egg laying dishes directly into a 50 ml conical tube full of liquid nitrogen using a rubber policeman. Multiple collections were pooled until roughly 200 µl embryos had been collected for each sample. The liquid nitrogen was then decanted, the tube capped, and the pooled embryos stored at −80°C. Frozen pellets of a modified polysome lysis buffer additionally including 50 μM GMP-PNP (Sigma-Aldrich) were prepared by dripping buffer into a conical tube of liquid nitrogen. The nitrogen was decanted, the tube capped, and buffer pellets stored at −80°C. Frozen embryos and 4–6 vol of frozen buffer pellets were ground together 6 times for 2 min each at 15 Hz in a TissueLyser (Qiagen), chilling the canisters in liquid nitrogen before and after each round of grinding. Grindate was either stored at −80°C, or thawed immediately under running tepid water. Thawed grindate was clarified by spinning at 3,000 × <italic>g</italic> in a tabletop centrifuge. Avoiding the wax and fat layers at the top, the supernatant was collected into pre-chilled microcentrifuge tubes, and clarified again by spinning 10 min at 20,000 × <italic>g</italic> at 4°C. Lysates were aliquoted, flash-frozen in liquid nitrogen, and stored at −80°C.</p></sec></sec><sec id="s4-3"><title>Ribosome footprinting</title><p>Concentrations of total RNA in lysates were determined using the RiboGreen kit (Molecular Probes by Life Technologies). For each sample, 35–100 μg total RNA was diluted 2:1 in digestion buffer (50 mM Tris pH 7.5, 5 mM MgCl2, 0.5% Triton x-100, 1 mM DTT, 20 U/ml SuperaseIn, 20 μg/ml emetine, 15 mM CaCl<sub>2</sub>, and 3 U micrococcal nuclease [Roche Applied Science, Indianapolis, Indiana] per μg of total RNA in the sample), to bring the final concentration of NaCl to 100 mM and CaCl<sub>2</sub> to 5 mM. Samples were digested for 40 min at 25°C in a Thermomixer (Eppendorf, Hamburg, Germany). Digestions were quenched by adding EGTA to a final concentration of 6.25 mM and placing the reactions on ice. 1 U MNase is defined as previously (<xref ref-type="bibr" rid="bib60">Oh et al., 2011</xref>) as an increase of 0.005 A260 per min, measured in a Spectramax M2 plate reader (Molecular Devices, Sunnyvale, California) using 10 μg/ml salmon sperm DNA (Sigma-Aldrich) with 5 mM Ca+ and 20 mM Tris, pH 8.0 in a 0.1 ml reaction at 25°C.</p></sec><sec id="s4-4"><title>Sucrose gradients</title><p>10–50% sucrose gradients were prepared in polysome gradient buffer (250 mM NaCl, 15 mM MgCl<sub>2</sub>, 20 U/ml SuperaseIn, 20 μg/ml emetine) using a GradientMaster (Biocomp Instruments, Fredericton, New Brunswick, Canada) in polyclear centrifuge tubes (Seton Scientific, Petaluma, California). Up to 200 µl of samples was applied to the top of each gradient. Gradients were resolved by spinning for 3 hr at 35 krpm at 4°C in an SW-41 rotor (Beckmann Coulter, Brea, California), and fractionated using the GradientMaster. When appropriate, monosome fractions were collected, flash-frozen in liquid nitrogen, and stored at −80°C.</p></sec><sec id="s4-5"><title>Sucrose cushions</title><p>Up to 0.5 ml of digested sample was layered atop 1.0 ml of a solution of 34% sucrose in polysome gradient buffer. Monosomes were sedimented by spinning for 4 hr at 70 krpm at 4°C in a TLA-110 rotor (Beckmann Coulter). Pellets were resuspended in 600 µl 10 mM Tris, pH 7.0 and stored at −20°C.</p></sec><sec id="s4-6"><title>Ribosome profiling of <italic>D. melanogaster</italic></title><p>Lysates were prepared and footprinted as above. Unless otherwise indicated, monosomes were enriched by sedimentation through 34% sucrose cushions and resuspended in 600 µl 10 mM Tris, pH 7.0. Resuspended monosomes were extracted once with 700 µl 65°C acid phenol and 40 µl 10% SDS, followed by 650 µl acid phenol and a final extraction with chloroform. RNA was precipitated for at least 2 hr at −30°C, resuspended in 10 mM Tris, pH 7.0, and quantitated on a NanoDrop spectrophotometer (Thermo Scientific, Asheville, North Carolina). 5–35 μg RNA was dephosphorylated for 1 hr at 37°C using T4 polynucleotide kinase (New England Biolabs, Ipswich, Massachusetts) in a 50 µl reaction and resolved on a 15% TBE-urea gel (Invitrogen by Life Technologies). A gel slab spanning 28–34 nt (as measured by oligoribonucleotide size standards in a neighboring lane; see <xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref>) was excised from the gel, eluted, and precipitated. Samples were then carried through all steps of library generation (see below).</p></sec><sec id="s4-7"><title>Poly(A)+ RNA-seq of <italic>D. melanogaster</italic></title><p>For each sample, 375 µl of undigested polysome lysate was diluted into 3 vol Trizol LS (Invitrogen) and total RNA was extracted following the manufacturer’s instructions. 20–50 μg Poly(A)+ RNA was selected on oligo-dT25 DynaBeads (Invitrogen) per manufacturer’s instructions, and fragmented at 95°C in fragmentation buffer (2 mM EDTA, 100 mM NaCO<sub>3</sub>/NaHCO<sub>3</sub>, pH 9.2) to a mean size of roughly 100 nt. Fragmented RNA was precipitated, dephosphorylated for 1 hr at 37°C with T4 polynucleotide kinase (New England Biolabs), and resolved on a 15% TBE-urea gel. A gel slab corresponding to 55–65 nt was excised from the gel, eluted, and precipitated. Samples were then carried through all steps of library generation (see below).</p></sec><sec id="s4-8"><title>Subtractive hybridization to remove rRNA-derived fragments</title><p>We performed two sequential rounds of subtractive hybridization on each sample. To 5 µl cDNA the following were added: 1 µl 20× SSC, 3 µl nuclease-free water, and 1 µl of a 60 μM mixture of the biotinylated oligonucleotides oJGD132, oJGD133, oJGD134, oJGD135, oJGD136, oJGD161, oJGD162, oJGD163, and oJGD164 (sequences in <xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref>) mixed in a ratio of 25.5:1:13:17:4:6:2:11:21. Samples were denatured for 90 s at 95°C and annealed for 20 min at 25°C. MyOne Streptavidin C1 DynaBeads (Invitrogen) were prepared as follows: for each sample, 45 µl of beads were aliquoted into a microcentrifuge tube and washed three times in 50 µl 2 × binding buffer (10 mM Tris, pH 7.5, 1 mM EDTA, 2 M NaCl), and resuspended in 22.5 µl 2 × binding buffer. 10 µl equilibrated beads were added to 10 µl hybridized sample. The mixture was incubated at 20 min in a room temperature Thermomixer with shaking at 850 rpm. Beads were then separated on a magnetic manifold (Invitrogen) and the supernatant recovered to a microcentrifuge tube.</p><p>For the second round of subtraction, 1 µl 60 μM biotinylated oligo mix and 1 µl 20X SSC were added to the supernatant from the first subtraction, and the denaturation and annealing repeated. 10 µl of equilibrated beads were pelleted on a magnetic manifold. The buffer was removed, and the beads resuspended in the mixture from the second hybridization. Samples were then incubated at 20 min in a room temperature Thermomixer with shaking at 850 rpm. The supernatant was recovered on a magnetic manifold, transferred to a microcentrifuge tube, precipitated, and resuspended in 15 µl 10 mM Tris, pH 8.0.</p></sec><sec id="s4-9"><title>Library generation</title><p>RNA concentrations were measured using the Small RNA Series II Bioanalyzer assay (Agilent Technologies, Santa Clara, California). 10–15 picomoles of RNAs were ligated to 1 μg 3′ miRNA cloning linker 1 (Integrated DNA Technologies, Coralvaille, Iowa) for 2 hr 30 min at 25°C in ligase buffer (1× T4 RNA ligase 2 buffer [New England Biolabs], 40% PEG-100 [Sigma-Aldrich], 5% DMSO, T4 RNA ligase 2 K227Q, truncated [a kind gift from Calvin Jan]) in a 20 µl reaction. Ligated fragments were precipitated for at least 2 hr at −30°C, purified on a 10% TBE-urea gel, eluted, and precipitated. Ligation products were then reverse-transcribed using SuperScript III (Invitrogen) in a 16.7 µl reaction using using the primer o225-link1 (see <xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref>). RNA template was hydrolyzed by addition of 1/10 vol 1 M NaOH and incubation at 95°C for 20 min cDNAs were purified on a 10% TBE-urea gel (Invitrogen), eluted, precipitated, and resuspended in 5 µl 10 mM Tris pH 7.0. cDNAs from footprint samples were subjected to two rounds of subtractive hybridization as described above.</p><p>Subtracted samples were circularized using CircLigase (Epicentre, Madison, Wisconsin), following manufacturer’s instructions in a 20 µl reaction. An additional microliter of CircLigase was then added, and the circularization repeated a second time. Circularized libraries were amplified by 6–12 cycles of PCR using oNTI231 and any of four indexing primers oCJ30–33 (<xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref>) using Phusion polymerase (Finnzymes by ThermoScientific) in a 17 µl reaction. Amplification products were size-selected on 8% TBE gels (Invitrogen), eluted, precipitated, and resuspended in 10 µl 10 mM Tris, pH 8.0. Samples were then quantitated using the Bioanalyzer High Sensitivity DNA assay (Agilent Technologies), diluted to 2 nM, multiplexed as needed, and subjected to 50–57 cycles of single-end sequencing on an Illumina HiSeq sequencer (Illumina, San Diego, CA) using version 3 clustering and sequencing kits with a 6-cycle index read (Illumina).</p></sec><sec id="s4-10"><title>Sequence processing and alignment</title><p>For all <italic>Drosophila</italic> experiments we used revision 5.43 of the FlyBase genome annotation and the corresponding genome assembly (<xref ref-type="bibr" rid="bib50a">Marygold et al., 2013</xref>). Reads were demultiplexed and cleaned of 3′ cloning adapters using in-house scripts. Reads shorter than 25 nt were discarded. Remaining reads were aligned using Bowtie version 0.12.7 (<xref ref-type="bibr" rid="bib44">Langmead et al., 2009</xref>) sequentially to Bowtie indices composed of the following sequences: (a) <italic>D. melanogaster</italic> rRNAs (GenBank accession #M21017 [<xref ref-type="bibr" rid="bib71">Tautz et al., 1988</xref>] and from FlyBase), (b) <italic>D. melanogaster</italic> tRNAs, snoRNAs, and snRNAs (from FlyBase), (c) cloning oligos, (d) the S288C yeast genome version R64-1-1 (downloaded on 6 June 2011 from <ext-link ext-link-type="uri" xlink:href="http://downloads.yeastgenome.org/sequence/S288C_reference/genome_releases/">http://downloads.yeastgenome.org/sequence/S288C_reference/genome_releases/</ext-link>), (e) <italic>Wolbachia</italic> (GenBank accession #AE017196)<italic>,</italic> (f) <italic>D. melanogaster</italic> chromosome arms, and (g) splice junctions (from FlyBase and, in the case of embryos—figure supplemented with junctions discovered in the pooled embryo mRNA datasets using HMMSplicer 0.95 [<xref ref-type="bibr" rid="bib11">Dimon et al., 2010</xref>]). For all quantitative analyses, we counted only uniquely-mapped reads.</p><p>Alignments were assigned to genomic coordinates as follows. Randomly-fragmented poly(A)+ mRNA alignments were counted along the entire length of the alignment. Each genomic position covered by a single RNA fragment was incremented 1/<italic>l</italic>, where <italic>l</italic> corresponds to the length of the alignment. Ribosome-protected footprint alignments were mapped to their estimated P-sites as follows: 12 nt were pruned from each end of the alignment, leaving a fragment <italic>n</italic> nt long (where <italic>n</italic> = <italic>l −</italic> 2 × 12). Each genomic position covered by a nucleotide remaining in the pruned alignment was then incremented by 1/<italic>n</italic>. Thus, the P-site of each 25 mer was assigned to one unique position, while the P-site of each 26-mer was spread over two positions, each incremented by 0.5 reads, and so on. Alignment statistics are given in <xref ref-type="supplementary-material" rid="SD1-data">Supplementary file 1B</xref>.</p></sec><sec id="s4-11"><title>Attribution of counts to genes and transcripts</title><sec id="s4-11-1"><title>Masking of degenerate genomic positions</title><p>To determine which positions in the genome give rise to reads that fail to uniquely map, we divided the genome into all possible 29-mers centered on each nucleotide position, and aligned the resulting 29-mer back to the genome allowing zero mismatches. If the 29-mer aligned to multiple sites, the position from which it arose was flagged as degenerate. All such positions were excluded from further analysis.</p></sec><sec id="s4-11-2"><title>Attribution of nucleotide positions to loci</title><p>Because the genome annotation contains polycistronic transcripts in which each cistron is annotated as belonging to a separate gene—for example <italic>tarsal-less/polished rice</italic>, which is annotated as four separate genes (FBgn0259730–3)—we collapsed each set of genes whose transcripts share exons (370 genes total) into 179 merged loci. All nucleotide positions in any transcript deriving from a locus were attributed to that locus. Any nucleotide position attributed to multiple loci (e.g., overlapping genes on the same strand), were excluded from further analyses on the gene or transcript levels.</p></sec><sec id="s4-11-3"><title>Attribution of nucleotide positions to exons, 5′UTRs, 3′ UTRs, and coding regions</title><p>For each locus, any position included in any transcript deriving from that locus was included in the list of exonic positions for that gene. Any exonic position which could be labeled as two or more of CDS, 5′UTR, or 3′ UTR depending on the transcript isoform was still counted as exonic, but was excluded from analyses that required positions to be uniquely labeled (e.g., comparisons of translation in 5′ or 3′ UTRs to CDS) unless otherwise noted.</p></sec><sec id="s4-11-4"><title>Filtering of countable loci</title><p>For all analyses, we counted only loci or transcripts deriving from loci that contain at least 95% non-degenerate positions and are at least 60 nucleotides in exonic length, after exclusion of degenerate positions and positions covered by multiple loci. Genes and transcripts that are not translated but which may contaminate the data due to their abundance (<italic>e.g.,</italic> those that encode microRNAs, rRNAs, snRNAs, snoRNAs, and tRNAs) were excluded from analysis. We also excluded the loci <italic>mod(mdg-4) (</italic>which contains transcripts deriving from both strands) and <italic>Yeti,</italic> (for which transcript annotations existed on chromosome arms 3R and 2RHet).</p></sec></sec><sec id="s4-12"><title>Measurements of gene expression</title><p>mRNA abundance and ribosome density for each genomic feature were measured in reads per kilobase of feature length per million reads aligning to chromosomes or splice junctions in the dataset (RPKM), a unit which corrects for both feature length and sequencing depth. Unless otherwise indicated, the RPKM values we report for mRNA abundance reflect the total number of RNA fragments aligning to all countable exonic positions for a given locus. For ribosome density, we report the total number of ribosome-protected footprint fragments aligning to all countable positions of a coding region (CDS) for a given locus. We calculate translation efficiency as the ratio of footprint RPKM in the CDS to the RNA fragment RPKM across the entire locus. When comparing mRNA fragment or footprint density between samples, we restricted our analyses to genes that had at least 128 summed counts between replicates as determined in <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>. When comparing translation efficiencies between samples, we required at least 128 exonic counts of mRNA for each gene.</p></sec><sec id="s4-13"><title>Translation efficiency of 5′ UTRs, CDS, and 3′ UTRs</title><p>Translation efficiencies for these regions were calculated as the ratio of footprint counts to mRNA counts in each region, for all regions with at least 128 mRNA counts. We excluded all positions that could be labeled as two or more of 5′ UTR, CDS, or 3′ UTR depending upon transcript isoform. To remove variability or bleedthrough introduced by start and stop codon peaks, we additionally excluded the following genomic positions from consideration: 9 nucleotides preceding each start codon, 15 nucleotides following each start codon, the 15 nucleotides preceding each stop codon, and the 15 nucleotides following each stop codon.</p></sec><sec id="s4-14"><title>Identification of C-terminal protein extensions</title><sec id="s4-14-1"><title>Mapping predicted extensions to transcripts in the modern annotation</title><p>C-terminal extensions predicted by <xref ref-type="bibr" rid="bib34">Jungreis et al. (2011)</xref> were mapped onto the FlyBase annotation 5.43 as follows: First, 26 predicted extensions that overlap regions that are annotated as coding (for reasons other than readthrough) in the present annotation were excluded from further analysis. One additional extension was excluded because it overlapped the 5′ UTR of another gene. The remaining 256 extensions were mapped to transcripts in FlyBase r5.43 that satisfied the following criteria: (a) if the transcript contains an annotated 3′ UTR, it fully covers the extension and (b) the transcript’s annotated stop codon must immediately precede the extension in transcript coordinates.</p></sec><sec id="s4-14-2"><title>Readthrough rates</title><p>Stop codon readthrough rates were evaluated by dividing the ribosome density (in RPKM) for each C-terminal extension by the ribosome density in the corresponding CDS. In cases where multiple transcript isoforms contained the same extension, the transcript that minimized the ratio of ribosome footprint density in the extension to the density in the CDS was reported. To control for variability introduced by start and stop codon peaks (see <xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>), we excluded the following genomic positions from our totals: 12 nucleotides following the start codon, the 15 nucleotides preceding the stop codons of the coding region and the extension, and the 9 nucleotides following the stop codon of the coding region.</p></sec><sec id="s4-14-3"><title>Scoring of predicted extensions</title><p>A predicted extension was scored positively if: (a) there existed ribosome density in the extension, (b) ribosome density vanished or unambiguously decreased after the extension’s in-frame stop codon, and (c) positions occupied by ribosomes in the readthrough region were evenly-spaced throughout the extension. When ribosome density was sparse in the extension, we relaxed criterion (c) and additionally required a peak of at least two reads at the extension’s stop codon. Aside from <italic>Kelch,</italic> which has been demonstrated to undergo readthrough experimentally (<xref ref-type="bibr" rid="bib65">Robinson and Cooley, 1997</xref>), we did not positively score any extension that contained a methionine in its first three codons, as these could represent downstream cistrons rather than true extensions. Furthermore, we required read density upstream of the first methionine in any extension containing a methionine codon.</p></sec><sec id="s4-14-4"><title>Identification of novel extensions</title><p>We identified all coding transcripts in FlyBase r5.43 in which: (a) the 3′ UTR was annotated, (b) a C-terminal extension was not predicted by Jungreis et al, (c) there were at least five codons between the annotated stop codon of the CDS and the next in-frame stop codon, and (d) the region between these stop codons (the putative extension) did not overlap any annotated CDS, 5′UTR, tRNA, rRNA, snRNA, snoRNA, miRNA, or pre-miRNA. We additionally excluded extensions whose translation could be explained by alternative splice isoforms whose transcripts omitted the stop codon, using splice junctions from FlyBase revision 5.43 and inferred from our on RNA-seq data, as described above.</p><p>Following the same scoring criteria we used for the extensions predicted by Jungreis et al., we scored each candidate extension that met the following criteria: (a) a minimum read density of 0.2 RPKM in the extension, (b) a minimum readthrough rate of 0.001, (c) at least 10% of the nucleotide positions in the extension covered by reads, (d) the first read occurring within the first quartile of extension length, (e) the last read occurring within last quartile of the extension length, and (f) a 75% or greater decrease in read density in the first 114 nucleotides of distal 3′ UTR compared to the extension. To calculate this last statistic for transcripts whose distal 3′ UTR was less than 114 nt in length, we extended the distal 3′ UTR in uninterrupted genomic coordinates to 114 nt in length.</p></sec></sec><sec id="s4-15"><title>Metagene analyses</title><p>For each analyses (<xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>, <xref ref-type="fig" rid="fig4">Figure 4B</xref>), we identified regions of interest (ROIs) germane to the analysis. In <xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>, these included roughly 3000 ROIs each for the left and right panels, each of which met the following criteria: (a) all transcripts deriving from that gene had one annotated start codon (left panel) or stop codon (right panel), (b) all transcripts deriving from that locus covered identical genomic positions over the region of interest (ROI) shown, (c) all positions within the ROI were non-degenerate (see ‘Materials and methods’), and (d) at least 10 reads were present in the coding subregion of the ROI. For coding regions in <xref ref-type="fig" rid="fig4">Figure 4C</xref>, we kept the same criteria as above but required only 0.5 reads in the coding subregion of each ROI, yielding roughly 7401 ROI for that set. For C-terminal extensions, we required only that the extension be long enough to cover the interval shown, and have 0.5 reads in the coding subregion, allowing us to include 123 of the 350 extensions.</p><p>For each ROI, we then generated a ‘coverage vector’ tallying ribosome density at each nucleotide position. We then normalized each coverage vector to the mean number of footprint reads covering the annotated coding region in the ROI, excluding a 3-codon buffer flanking the start or stop codon to avoid bleedthrough from initiation or termination peaks. We then plotted the median value across all normalized coverage vectors at each position.</p></sec><sec id="s4-16"><title>Search for genomic polymorphisms and A-to-I editing</title><p>To improve our sensitivity in detection, we re-aligned our footprint and mRNA datasets to a Bowtie database of spliced transcript models, allowing three mismatches (where we previously only allowed two). For the first, second, and third nucleotide position in each unique, annotated stop codon, we counted the number of matching and mismatching nucleotides in each read alignment covering that position. We ignored mismatches that occurred in the first position of the read alignment, because they frequently arise from non-templated nucleotide addition by reverse transcriptase. Considering the first, second, and third positions of each stop codon separately, we calculated a global average mismatch frequency for each. We then searched for individual stop codon positions that far exceeded the corresponding global average using a binomial test, controlling the false discovery rate at 5% following the procedure of Benjamini and Hochberg (<xref ref-type="bibr" rid="bib2">Benjamini and Hochberg, 1995</xref>). We performed this analysis separately upon each of three datasets: total mRNA, total footprints, and the subset of footprints whose P-sites had passed the nucleotide position in question, following the P-site assignment rules described above.</p></sec><sec id="s4-17"><title>Immunoprecipitation and western blotting</title><p>4–48 ml of transiently or stably transfected cells were harvested 48 or 72 hr post-transfection by centrifuging for 2 min at 1600 rpm in a tabletop centrifuge. All cell pellets were rinsed once in PBS, and flash-frozen in a bath of dry ice in ethanol. Cell pellets were thawed and lysed for at least 15 min on ice in 0.5–1.5 ml lysis buffer (150 mM NaCl, 50 mM Tris pH 7.5, 1% Triton x-100, 1 mM EDTA and 1× complete protease inhibitor cocktail [Roche Applied Science]), depending upon the pellet size. Lysates were clarified by spinning 10 min at 20,000 × <italic>g</italic> in a microcentrifuge and supernatants collected. GFP reporters were immunoprecipitated on 10 μl of anti-GFP beads (Chromotek, Planegg-Martinsried, Germany) equilibrated in IP wash buffer (150 mM NaCl, 50 mM Tris pH 7.5, 1 mM EDTA, 0.05% Triton x-100). The bound fraction was washed three times in IP wash buffer, and finally eluted by boiling for at least 5 min in NuPage sample loading buffer (Invitrogen). Supernatants were collected and transferred to new tubes, and stored at −20°C.</p><p>For western blotting, samples were resolved on 4–12% NuPage gels (Invitrogen) in MOPS buffer. Gel lanes were loaded such that the amounts of uncleaved GFP reporter in each lane were loaded as equally as possible. GFP was detected using a mouse anti-GFP antibody (Roche Applied Sciences), and visualized using IR800 anti-mouse antibodies on a LI-COR Odyssey system (LI-COR, Lincoln, Nebraska). FLAG was similarly detected on a separate gel, instead using the M2 Mouse anti-FLAG antibody (Sigma-Aldrich).</p></sec><sec id="s4-18"><title>PhyloCSF analysis</title><p>PhyloCSF analysis was performed on all C-terminal extensions at least five codons long, exclusive of the stop codons. Multiple species alignments were obtained from the <italic>Drosophila</italic> 12-way multispecies alignment as downloaded from the UCSC genome browser, and stitched together over regions of interest using the Phast utility maf_parse (<xref ref-type="bibr" rid="bib28">Hubisz et al., 2011</xref>). PhyloCSF was then used to evaluate the extension on the empirical codon model ‘12flies’. Columns in which the <italic>D. melanogaster</italic> sequence contained gaps were ignored. Alignments that contained no sequence besides that from <italic>D. melanogaster</italic> were not evaluated.</p></sec><sec id="s4-19"><title>Z-Curve classifier</title><p>We calculated the 189-variable Z-curve as previously described (<xref ref-type="bibr" rid="bib20">Gao and Zhang, 2004</xref>). We empirically determined that the classifier became error-prone if trained on sequences 81 nt or shorter in length. Our training set consisted of 81-nucleotide windows drawn from coding regions (the positive set, 14,507 windows) or from portions of distal 3′ UTRs that did not overlap annotated coding regions or 5′ UTRs (the negative set, 8151 windows). To assay the stability of the classifier’s behavior and control for overfitting, we trained the classifier with fourfold cross-validation training on 2200 windows from the CDS set and 2200 windows from the distal 3′ UTR set, yielding an average misclassification error of 6.9–7.3% with each iteration. We repeated this analysis (and cross-validation) several times selecting different 81-nucleotide windows from each CDS and distal 3′ UTR, obtaining similar levels of error. The classifier was then trained on the entire training set, and used to evaluate randomly chosen 81 nt windows from observed C-terminal protein extensions that were 81 nt or greater in length. These included 26 extensions predicted by Jungreis et al. and 83 novel extensions.</p></sec><sec id="s4-20"><title>SNP analysis</title><p>We downloaded SNP data from the <italic>Drosophila</italic> Population Genomics Project from Ensembl.org (release 67; <xref ref-type="bibr" rid="bib17a">Flicek et al., 2013</xref>) and counted the proportion of SNPs that, if translated in frame, would cause synonymous substitutions in coding regions, extensions, and distal 3′ UTRs.</p></sec><sec id="s4-21"><title>Tests for differential regulation of readthrough rates</title><p>To test C-terminal extensions for differential readthrough rates, we examined all extensions which met the following criteria: (1) all annotated isoforms covering the extension contain exactly the same CDS, (2) the CDS had at least 128 total footprint reads in each of the S2 cell and embryo samples, and (3) the C-terminal extension had been scored as positive for readthrough in either the S2 and/or the embryo sample. For those extensions, we tabulated the footprint reads that aligned to the CDS and putative extension, masking out regions normally covered by start and stop codon peaks as described (see section ‘Readthrough rates’, above). For each extension, this tabulation yielded a 2 × 2 contingency table of reads aligning to the CDS and extension in the S2 cell and 0–2 hr embryo datasets. We evaluated the statistical significance of asymmetry in the contingency tables using Fisher’s exact test, and controlled the false discovery rate at 5% using the procedure of Benjamini and Hochberg (<xref ref-type="bibr" rid="bib2">Benjamini and Hochberg, 1995</xref>).</p></sec><sec id="s4-22"><title>Human and yeast ribosome profiling data</title><p>For human cells, we collected data from uninfected human foreskin fibroblasts and processed it as previously described (<xref ref-type="bibr" rid="bib70">Stern-Ginossar et al., 2012</xref>). Yeast samples were collected from [<italic>psi</italic><sup><italic>−</italic></sup>] W303 and processed as previously described (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>), with the exception that a 3′ linker ligation strategy was used instead of poly(A) tailing for fragment capture. For phasing of yeast footprints, we counted only 28-mers, which have previously been shown to be the best-phased footprint population in that organism (<xref ref-type="bibr" rid="bib30">Ingolia et al., 2009</xref>).</p></sec><sec id="s4-23"><title>Motif prediction</title><p>C-terminal extensions 20 amino acids or longer were scanned for transmembrane domains using TmHmm (<xref ref-type="bibr" rid="bib42">Krogh et al., 2001</xref>) using default settings. Nuclear localization signals were predicted in extensions 20 amino acids or longer using the cNLS mapper (<xref ref-type="bibr" rid="bib41">Kosugi et al., 2009</xref>), with a score cutoff of 7.0. Peroxisome targeting signals were predicted for all extensions 12 amino acids or longer using PTS1 Predictor (<xref ref-type="bibr" rid="bib59">Neuberger et al., 2003</xref>) with the signal type set to ‘metazoan’. Prenylation signals were predicted for all extensions 12 amino acids or longer using PrePS (<xref ref-type="bibr" rid="bib50">Maurer-Stroh and Eisenhaber, 2005</xref>). In addition, we searched for ER retention signals using the consensus [KH]DEL*.</p><p>We searched 3′ UTRs (including the predicted extension and entire distal 3′ UTR) for selenocysteine insertion elements using SeciSearch 2.19 (<xref ref-type="bibr" rid="bib43">Kryukov et al., 2003</xref>) with parameters set as follows: e1 = 05, e2 = −22, Y_filter = True, O_filter = True, B_filter = True, S_filter = True. We searched each 3′ UTR using every available SECIS Pattern (pat_c, pat_Sep20, pat_dm, pat_g, and pat_s), and considered a 3′ UTR receiving a COVE score above the recommended threshold of 15 in any of the pattern searches to contain a SECIS element. Additionally, we excluded any extensions that were annotated as selenoprotein annotations in SelenoDB (<xref ref-type="bibr" rid="bib6">Castellano et al., 2008</xref>; for <italic>Drosophila,</italic> yeast, and human data) or FlyBase (<xref ref-type="bibr" rid="bib50a">Marygold et al., 2013</xref>; for <italic>Drosophila</italic>), For transcripts with no annotated or short 3′ UTRs, we extended the 3′ UTR in uninterrupted genome coordinates until it was 1000 nucleotides in length, an in <xref ref-type="bibr" rid="bib34">Jungreis et al. (2011)</xref>.</p></sec><sec id="s4-24"><title>Microscopy</title><p>S2 cells stably transfected with the reporter of interest were maintained at a density of 1.6–12 million cells/ml. Nuclei were visualized by staining with 1 μg/ml Hoechst 34580 (Invitrogen) for at least 5 min. Live cells were imaged in culture media on an inverted spinning disk confocal Nikon Ti microscope (Nikon Instruments, Melville, NY) in glass-bottom culture dishes (MatTek, Ashland, MA). Images were contrast-adjusted and prepared for presentation in Adobe Photoshop (Adobe Systems, San Jose, CA).</p></sec><sec id="s4-25"><title>Data files</title><p>Both the raw data (as FastQ files) and processed data (wiggle files) are available in NCBI’s Gene Expression Omnibus (<xref ref-type="bibr" rid="bib13">Edgar et al., 2002</xref>) under GEO series accession number GSE49197 (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE49197">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE49197</ext-link>). Supplementary tables 1–4 are available at Dryad (<xref ref-type="bibr" rid="bib12">Dunn et al., 2013</xref>; <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5061/dryad.6nr73">http://dx.doi.org/10.5061/dryad.6nr73</ext-link>):</p><p>Supplementary table 1: gene expression measurements in 0–2 hr embryos and S2 cells. Source data for <xref ref-type="fig" rid="fig1 fig2">Figures 1 and 2</xref>, as well as their supplements.</p><p>Supplementary table 2: readthrough statistics for <italic>Drosophila melanogaster</italic>. Source data for <xref ref-type="fig" rid="fig3 fig4 fig6">Figures 3, 4 and 6</xref>, as well as their supplements, and annotations of readthrough events in <italic>Drosophila melanogaster</italic>.</p><p>Supplementary table 3: readthrough statistics for <italic>Saccharomyces cerevisiae</italic>. Source data for <xref ref-type="fig" rid="fig5">Figure 5</xref> and annotations of readthrough events in [<italic>psi</italic><sup>-</sup>] W303 yeast.</p><p>Supplementary table 4: readthrough statistics for human foreskin fibroblasts. Source data for <xref ref-type="fig" rid="fig5">Figure 5</xref> and annotations of readthrough events in human foreskin fibroblasts.</p><p><xref ref-type="supplementary-material" rid="SD1-data">Supplementary file 1</xref> provides alignment statistics and <xref ref-type="supplementary-material" rid="SD2-data">Supplementary file 2</xref> contains the oligonucleotides used in this study.</p></sec><sec id="s4-26"><title>Other software and libraries</title><p>We wrote custom scripts in <italic>Python</italic> 2.7, using the following open-source libraries: Numpy 1.6.0 (<ext-link ext-link-type="uri" xlink:href="http://numpy.scipy.org">http://numpy.scipy.org</ext-link>), Scipy 0.11.0rc2 (<ext-link ext-link-type="uri" xlink:href="http://www.scipy.org">http://www.scipy.org</ext-link>), Biopython 1.59 (<xref ref-type="bibr" rid="bib10">Cock et al., 2009</xref>), PySam, and HTSeq 0.5.1p2 (<ext-link ext-link-type="uri" xlink:href="http://www-huber.embl.de/users/anders/HTSeq/doc/overview.html">http://www-huber.embl.de/users/anders/HTSeq/doc/overview.html</ext-link>). Plots and genome browser snapshots were generated using Matplotlib 1.0.1 (<xref ref-type="bibr" rid="bib29">Hunter, 2007</xref>).</p></sec></sec></body><back><ack id="ack"><title>Acknowledgements</title><p>We thank John Atkins for pointing out the importance of translation readthrough in <italic>Drosophila;</italic> Irwin Jungreis and Manolis Kellis for critical comments on the manuscript; Noam Ginossar for supplying the human foreskin fibroblast ribosome profiling data; Jeffrey Farrell, Tony Shermoen, and Hansong Ma for useful conversation and help with handling flies; Onn Brandman, Luke Gilbert, Noam Ginossar, Calvin Jan, Gene-wei Li, and Eugene Oh for advice on laboratory and analytical methods; and Clement Chu, Jessica Lund, and Silvi Rouskin for help with sequencing. We also thank Jessica Walter, the UCSF Cell Propulsion Lab, DeLaine Larsen, and the Nikon Imaging Center at UCSF for help with imaging.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>The authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>JGD, Performed <italic>Drosophila</italic> ribosome profiling, Developed bioinformatic methods, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con2"><p>CKF, Performed yeast ribosome profiling, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con3"><p>NGB, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con4"><p>ERG, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con5"><p>JSW, Conception and design, Analysis and interpretation of data, Drafting or revising the article</p></fn></fn-group></sec><sec sec-type="supplementary-material"><title>Additional files</title><supplementary-material id="SD1-data"><object-id pub-id-type="doi">10.7554/eLife.01179.023</object-id><label>Supplementary file 1.</label><caption><title>Alignment statistics.</title><p>Provides statistics on read alignments by sample and genomic region (e.g., CDS, 5’ UTR, 3’ UTR, intergenic, etc; <bold>A</bold>), as well as by sample and alignment type (e.g., chromosomal, spliced, unaligned; <bold>B</bold>).</p></caption><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.023">http://dx.doi.org/10.7554/eLife.01179.023</ext-link></p><media mime-subtype="xls" mimetype="application" xlink:href="elife01179s001.xls"/></supplementary-material><supplementary-material id="SD2-data"><object-id pub-id-type="doi">10.7554/eLife.01179.024</object-id><label>Supplementary file 2.</label><caption><title>Oligonucleotides used in this study.</title><p>For readers who wish to implement the <italic>Drosophila</italic> ribosome profiling protocol.</p></caption><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01179.024">http://dx.doi.org/10.7554/eLife.01179.024</ext-link></p><media mime-subtype="xls" mimetype="application" xlink:href="elife01179s002.xls"/></supplementary-material><sec sec-type="datasets"><title>Major datasets</title><p>The following datasets were generated:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro1"><name><surname>Dunn</surname><given-names>JG</given-names></name>, <name><surname>Weissman</surname><given-names>JS</given-names></name>, <year>2013</year><x>, </x><source>Ribosome profiling reveals pervasive and regulated stop codon readthrough in <italic>Drosophila melanogaster</italic></source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE49197">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE49197</ext-link><x>, </x><comment>Publicly available at NCBI GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro2"><name><surname>Dunn</surname><given-names>JG</given-names></name>, <name><surname>Foo</surname><given-names>CK</given-names></name>, <name><surname>Belletier</surname><given-names>NG</given-names></name>, <name><surname>Gavis</surname><given-names>ER</given-names></name>, <name><surname>Weissman</surname><given-names>JS</given-names></name>, <year>2013</year><x>, </x><source>Data from: Ribosome profiling reveals pervasive and regulated stop codon readthrough in <italic>Drosophila melanogaster</italic></source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5061/dryad.6nr73">http://dx.doi.org/10.5061/dryad.6nr73</ext-link><x>, </x><comment>Supplementary tables 1–4 publicly available at Dryad (<ext-link ext-link-type="uri" xlink:href="http://www.datadryad.org/">http://www.datadryad.org</ext-link>).</comment></related-object></p><p>The following previously published datasets were used:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro3"><collab>Drosophila Population Genomics Project</collab>, <year>2012</year><x>, </x><source>Data from: 50 Genomes - release 1.0</source><x>, </x><ext-link ext-link-type="uri" xlink:href="ftp://ftp.ensembl.org/pub/release-67/variation/gvf/drosophila_melanogaster/Drosophila_melanogaster.gvf.gz">ftp://ftp.ensembl.org/pub/release-67/variation/gvf/drosophila_melanogaster/Drosophila_melanogaster.gvf.gz</ext-link><x>, </x><comment>Freely available online. Downloaded from Ensembl on 23 August.</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro4"><collab>The Flybase Consortium</collab>, <source>Data from: <italic>D. melanogaster</italic> genome annotation revision 5.43</source><x>, </x><ext-link ext-link-type="uri" xlink:href="ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/dmel_r5.43_FB2012_01/gff/dmel-all-no-analysis-r5.43.gff.gz">ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/dmel_r5.43_FB2012_01/gff/dmel-all-no-analysis-r5.43.gff.gz</ext-link><x>, </x><comment>Freely available online.</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro5"><name><surname>Jungreis</surname><given-names>I</given-names></name>, <name><surname>Lin</surname><given-names>MF</given-names></name>, <name><surname>Spokony</surname><given-names>R</given-names></name>, <name><surname>Chan</surname><given-names>CS</given-names></name>, <name><surname>Negre</surname><given-names>N</given-names></name>, <name><surname>Victorsen</surname><given-names>A</given-names></name>, <name><surname>White</surname><given-names>KP</given-names></name>, <name><surname>Kellis</surname><given-names>M</given-names></name>, <year>2011</year><x>, </x><source>Data from: Evidence of abundant stop codon readthrough in <italic>Drosophila</italic> and other metazoa (<italic>Genome Res.</italic> 2011 21: 2096-2113)</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://genome.cshlp.org/content/suppl/2011/09/28/gr.119974.110.DC1/Data1_DmelReadthroughCandidates.txt">http://genome.cshlp.org/content/suppl/2011/09/28/gr.119974.110.DC1/Data1_DmelReadthroughCandidates.txt</ext-link><x>, </x><comment>Freely available online through the <italic>Genome Research</italic> Open Access option (<ext-link ext-link-type="uri" xlink:href="http://genome.cshlp.org/content/suppl/2011/09/28/gr.119974.110.DC1/Data1_DmelReadthroughCandidates.txt">Supp Data1.txt</ext-link>).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro6"><name><surname>Qin</surname><given-names>X</given-names></name>, <name><surname>Ahn</surname><given-names>S</given-names></name>, <name><surname>Speed</surname><given-names>TP</given-names></name>, <name><surname>Rubin</surname><given-names>GM</given-names></name>, <year>2007</year><x>, </x><source>Data from: Global analyses of mRNA translational control during early <italic>Drosophila</italic> embryogenesis (<italic>Genome Biology</italic> doi:10.1186/gb-2007-8-4-r63)</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://genomebiology.com/content/supplementary/gb-2007-8-4-r63-s1.xls">http://genomebiology.com/content/supplementary/gb-2007-8-4-r63-s1.xls</ext-link><x>, </x><comment>Open data from <italic>Genome Biology</italic> (Additional data file 1).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro7"><collab>Saccharomyces Genome Database project</collab>, <year>2013</year><x>, </x><source>Data from: Genome Release R64-1-1 and corresponding gene annotation</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://downloads.yeastgenome.org/curation/chromosomal_feature/saccharomyces_cerevisiae.gff">http://downloads.yeastgenome.org/curation/chromosomal_feature/saccharomyces_cerevisiae.gff</ext-link><x>, </x><comment>Freely available online. Downloaded 22 January.</comment></related-object></p></sec></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Beier</surname><given-names>H</given-names></name><name><surname>Grimm</surname><given-names>M</given-names></name></person-group><year>2001</year><article-title>Misreading of termination codons in eukaryotes by natural nonsense suppressor tRNAs</article-title><source>Nucleic Acids Res</source><volume>29</volume><fpage>4767</fpage><lpage>82</lpage><pub-id pub-id-type="doi">10.1093/nar/29.23.4767</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Benjamini</surname><given-names>Y</given-names></name><name><surname>Hochberg</surname><given-names>Y</given-names></name></person-group><year>1995</year><article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing</article-title><source>J R Stat Soc Series B Stat Methodol</source><volume>57</volume><fpage>289</fpage><lpage>300</lpage></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bonetti</surname><given-names>B</given-names></name><name><surname>Fu</surname><given-names>L</given-names></name><name><surname>Moon</surname><given-names>J</given-names></name><name><surname>Bedwell</surname><given-names>DM</given-names></name></person-group><year>1995</year><article-title>The efficiency of translation termination is determined by a synergistic interplay between upstream and downstream sequences in <italic>Saccharomyces cerevisiae</italic></article-title><source>J Mol Biol</source><volume>251</volume><fpage>334</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1006/jmbi.1995.0438</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brar</surname><given-names>GA</given-names></name><name><surname>Yassour</surname><given-names>M</given-names></name><name><surname>Friedman</surname><given-names>N</given-names></name><name><surname>Regev</surname><given-names>A</given-names></name><name><surname>Ingolia</surname><given-names>NT</given-names></name><name><surname>Weissman</surname><given-names>JS</given-names></name></person-group><year>2012</year><article-title>High-resolution view of the yeast meiotic program revealed by ribosome profiling</article-title><source>Science</source><volume>335</volume><fpage>552</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1126/science.1215110</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cassan</surname><given-names>M</given-names></name><name><surname>Rousset</surname><given-names>JP</given-names></name></person-group><year>2001</year><article-title>UAG readthrough in mammalian cells: effect of upstream and downstream stop codon contexts reveal different signals</article-title><source>BMC Mol Biol</source><volume>2</volume><fpage>3</fpage><pub-id pub-id-type="doi">10.1186/1471-2199-2-3</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Castellano</surname><given-names>S</given-names></name><name><surname>Gladyshev</surname><given-names>VN</given-names></name><name><surname>Guigó</surname><given-names>R</given-names></name><name><surname>Berry</surname><given-names>MJ</given-names></name></person-group><year>2008</year><article-title>SelenoDB 1.0: a database of selenoprotein genes, proteins and SECIS elements</article-title><source>Nucleic Acids Res</source><volume>36</volume><fpage>D332</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1093/nar/gkm731</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chan</surname><given-names>WM</given-names></name><name><surname>Shaw</surname><given-names>PC</given-names></name><name><surname>Chan</surname><given-names>HY</given-names></name></person-group><year>2007</year><article-title>A green fluorescent protein-based reporter for protein nuclear import studies in <italic>Drosophila</italic> cells</article-title><source>Fly (Austin)</source><volume>1</volume><fpage>340</fpage><lpage>2</lpage></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chao</surname><given-names>AT</given-names></name><name><surname>Dierick</surname><given-names>HA</given-names></name><name><surname>Addy</surname><given-names>TM</given-names></name><name><surname>Bejsovec</surname><given-names>A</given-names></name></person-group><year>2003</year><article-title>Mutations in eukaryotic release factors 1 and 3 act as general nonsense suppressors in <italic>Drosophila</italic></article-title><source>Genetics</source><volume>165</volume><fpage>601</fpage><lpage>12</lpage></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cimino</surname><given-names>PA</given-names></name><name><surname>Nicholson</surname><given-names>BL</given-names></name><name><surname>Wu</surname><given-names>B</given-names></name><name><surname>Xu</surname><given-names>W</given-names></name><name><surname>White</surname><given-names>KA</given-names></name></person-group><year>2011</year><article-title>Multifaceted regulation of translational readthrough by RNA replication elements in a tombusvirus</article-title><source>PLOS Pathog</source><volume>7</volume><fpage>e1002423</fpage><pub-id pub-id-type="doi">10.1371/journal.ppat.1002423</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cock</surname><given-names>PJ</given-names></name><name><surname>Antao</surname><given-names>T</given-names></name><name><surname>Chang</surname><given-names>JT</given-names></name><name><surname>Chapman</surname><given-names>BA</given-names></name><name><surname>Cox</surname><given-names>CJ</given-names></name><name><surname>Dalke</surname><given-names>A</given-names></name><etal/></person-group><year>2009</year><article-title>Biopython: freely available Python tools for computational molecular biology and bioinformatics</article-title><source>Bioinformatics</source><volume>25</volume><fpage>1422</fpage><lpage>3</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp163</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dimon</surname><given-names>MT</given-names></name><name><surname>Sorber</surname><given-names>K</given-names></name><name><surname>DeRisi</surname><given-names>JL</given-names></name></person-group><year>2010</year><article-title>HMMSplicer: a tool for efficient and sensitive discovery of known and novel splice junctions in RNA-Seq data</article-title><source>PLOS ONE</source><volume>5</volume><fpage>e13875</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0013875</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dunn</surname><given-names>JG</given-names></name><name><surname>Foo</surname><given-names>CK</given-names></name><name><surname>Belletier</surname><given-names>NG</given-names></name><name><surname>Gavis</surname><given-names>ER</given-names></name><name><surname>Weissman</surname><given-names>JS</given-names></name></person-group><year>2013</year><article-title>Data from: ribosome profiling reveals pervasive and regulated stop codon readthrough in <italic>Drosophila melanogaster</italic></article-title><source>Dryad Digital Repository</source><pub-id pub-id-type="doi">10.5061/dryad.6nr73</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname><given-names>R</given-names></name><name><surname>Domrachev</surname><given-names>M</given-names></name><name><surname>Lash</surname><given-names>AE</given-names></name></person-group><year>2002</year><article-title>Gene Expression Omnibus: NCBI gene expression and hybridization array data repository</article-title><source>Nucleic Acids Res</source><volume>30</volume><fpage>207</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1093/nar/30.1.207</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fearon</surname><given-names>K</given-names></name><name><surname>McClendon</surname><given-names>V</given-names></name><name><surname>Bonetti</surname><given-names>B</given-names></name><name><surname>Bedwell</surname><given-names>DM</given-names></name></person-group><year>1994</year><article-title>Premature translation termination mutations are efficiently suppressed in a highly conserved region of yeast Ste6p, a member of the ATP-binding cassette (ABC) transporter family</article-title><source>J Biol Chem</source><volume>269</volume><fpage>17802</fpage><lpage>8</lpage></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname><given-names>YX</given-names></name><name><surname>Yuan</surname><given-names>H</given-names></name><name><surname>Rein</surname><given-names>A</given-names></name><name><surname>Levin</surname><given-names>JG</given-names></name></person-group><year>1992</year><article-title>Bipartite signal for read-through suppression in murine leukemia virus mRNA: an eight-nucleotide purine-rich sequence immediately downstream of the gag termination codon followed by an RNA pseudoknot</article-title><source>J Virol</source><volume>66</volume><fpage>5127</fpage><lpage>32</lpage></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Firth</surname><given-names>AE</given-names></name><name><surname>Brierley</surname><given-names>I</given-names></name></person-group><year>2012</year><article-title>Non-canonical translation in RNA viruses</article-title><source>J Gen Virol</source><volume>93</volume><fpage>1385</fpage><lpage>409</lpage><pub-id pub-id-type="doi">10.1099/vir.0.042499-0</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Firth</surname><given-names>AE</given-names></name><name><surname>Wills</surname><given-names>NM</given-names></name><name><surname>Gesteland</surname><given-names>RF</given-names></name><name><surname>Atkins</surname><given-names>JF</given-names></name></person-group><year>2011</year><article-title>Stimulation of stop codon readthrough: frequent presence of an extended 3’ RNA structural element</article-title><source>Nucleic Acids Res</source><volume>39</volume><fpage>6679</fpage><lpage>91</lpage><pub-id pub-id-type="doi">10.1093/nar/gkr224</pub-id></element-citation></ref><ref id="bib17a"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Flicek</surname><given-names>P</given-names></name><name><surname>Ahmed</surname><given-names>I</given-names></name><name><surname>Amode</surname><given-names>MR</given-names></name><name><surname>Barrell</surname><given-names>D</given-names></name><name><surname>Beal</surname><given-names>K</given-names></name><name><surname>Brent</surname><given-names>S</given-names></name><etal/></person-group><year>2013</year><comment>Ensembl 2013</comment><source>Nucleic Acids Res</source><volume>41</volume><fpage>D48</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1093/nar/gks1236</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Freitag</surname><given-names>J</given-names></name><name><surname>Ast</surname><given-names>J</given-names></name><name><surname>Bölker</surname><given-names>M</given-names></name></person-group><year>2012</year><article-title>Cryptic peroxisomal targeting via alternative splicing and stop codon read-through in fungi</article-title><source>Nature</source><volume>485</volume><fpage>522</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1038/nature11051</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fritsch</surname><given-names>C</given-names></name><name><surname>Herrmann</surname><given-names>A</given-names></name><name><surname>Nothnagel</surname><given-names>M</given-names></name><name><surname>Szafranski</surname><given-names>K</given-names></name><name><surname>Huse</surname><given-names>K</given-names></name><name><surname>Schumann</surname><given-names>F</given-names></name><etal/></person-group><year>2012</year><article-title>Genome-wide search for novel human uORFs and N-terminal protein extensions using ribosomal footprinting</article-title><source>Genome Res</source><volume>22</volume><fpage>2208</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.1101/gr.139568.112</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname><given-names>F</given-names></name><name><surname>Zhang</surname><given-names>C</given-names></name></person-group><year>2004</year><article-title>Comparison of various algorithms for recognizing short coding sequences of human genes</article-title><source>Bioinformatics</source><volume>20</volume><fpage>673</fpage><lpage>81</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btg467</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Geller</surname><given-names>AI</given-names></name><name><surname>Rich</surname><given-names>A</given-names></name></person-group><year>1980</year><article-title>A UGA termination suppression tRNATrp active in rabbit reticulocytes</article-title><source>Nature</source><volume>283</volume><fpage>41</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1038/283041a0</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Green</surname><given-names>L</given-names></name><name><surname>Houck-Loomis</surname><given-names>B</given-names></name><name><surname>Yueh</surname><given-names>A</given-names></name><name><surname>Goff</surname><given-names>SP</given-names></name></person-group><year>2012</year><article-title>Large ribosomal protein 4 increases efficiency of viral recoding sequences</article-title><source>J Virol</source><volume>86</volume><fpage>8949</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.1128/jvi.01053-12</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Guttman</surname><given-names>M</given-names></name><name><surname>Russell</surname><given-names>P</given-names></name><name><surname>Ingolia</surname><given-names>NT</given-names></name><name><surname>Weissman</surname><given-names>JS</given-names></name><name><surname>Lander</surname><given-names>ES</given-names></name></person-group><year>2013</year><article-title>Ribosome profiling provides evidence that large noncoding RNAs do not encode proteins</article-title><source>Cell</source><volume>154</volume><fpage>240</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2013.06.009</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Halfmann</surname><given-names>R</given-names></name><name><surname>Jarosz</surname><given-names>DF</given-names></name><name><surname>Jones</surname><given-names>SK</given-names></name><name><surname>Chang</surname><given-names>A</given-names></name><name><surname>Lancaster</surname><given-names>AK</given-names></name><name><surname>Lindquist</surname><given-names>S</given-names></name></person-group><year>2012</year><article-title>Prions are a common mechanism for phenotypic inheritance in wild yeasts</article-title><source>Nature</source><volume>482</volume><fpage>363</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1038/nature10875</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hancock</surname><given-names>JM</given-names></name><name><surname>Tautz</surname><given-names>D</given-names></name><name><surname>Dover</surname><given-names>GA</given-names></name></person-group><year>1988</year><article-title>Evolution of the secondary structures and compensatory mutations of the ribosomal RNAs of <italic>Drosophila melanogaster</italic></article-title><source>Mol Biol Evol</source><volume>5</volume><fpage>393</fpage><lpage>414</lpage></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harrell</surname><given-names>L</given-names></name><name><surname>Melcher</surname><given-names>U</given-names></name><name><surname>Atkins</surname><given-names>JF</given-names></name></person-group><year>2002</year><article-title>Predominance of six different hexanucleotide recoding signals 3’ of read-through stop codons</article-title><source>Nucleic Acids Res</source><volume>30</volume><fpage>2011</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1093/nar/30.9.2011</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hatin</surname><given-names>I</given-names></name><name><surname>Fabret</surname><given-names>C</given-names></name><name><surname>Namy</surname><given-names>O</given-names></name><name><surname>Decatur</surname><given-names>WA</given-names></name><name><surname>Rousset</surname><given-names>J</given-names></name></person-group><year>2007</year><article-title>Fine-tuning of translation termination efficiency in <italic>Saccharomyces cerevisiae</italic> involves two factors in close proximity to the exit tunnel of the ribosome</article-title><source>Genetics</source><volume>177</volume><fpage>1527</fpage><lpage>37</lpage><pub-id pub-id-type="doi">10.1534/genetics.107.070771</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hubisz</surname><given-names>MJ</given-names></name><name><surname>Pollard</surname><given-names>KS</given-names></name><name><surname>Siepel</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>PHAST and RPHAST: phylogenetic analysis with space/time models</article-title><source>Brief Bioinform</source><volume>12</volume><fpage>41</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1093/bib/bbq072</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hunter</surname><given-names>JD</given-names></name></person-group><year>2007</year><article-title>Matplotlib: a 2D graphics environment</article-title><source>Comput Sci Eng</source><volume>9</volume><fpage>90</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1109/MCSE.2007.55</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ingolia</surname><given-names>NT</given-names></name><name><surname>Ghaemmaghami</surname><given-names>S</given-names></name><name><surname>Newman</surname><given-names>JR</given-names></name><name><surname>Weissman</surname><given-names>JS</given-names></name></person-group><year>2009</year><article-title>Genome-wide analysis in vivo of translation with nucleotide resolution using ribosome profiling</article-title><source>Science</source><volume>324</volume><fpage>218</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1126/science.1168978</pub-id></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ingolia</surname><given-names>NT</given-names></name><name><surname>Lareau</surname><given-names>LF</given-names></name><name><surname>Weissman</surname><given-names>JS</given-names></name></person-group><year>2011</year><article-title>Ribosome profiling of mouse embryonic stem cells reveals the complexity and dynamics of mammalian proteomes</article-title><source>Cell</source><volume>147</volume><fpage>789</fpage><lpage>802</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2011.10.002</pub-id></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jordan</surname><given-names>BR</given-names></name></person-group><year>1975</year><article-title>Demonstration of intact 26 S ribosomal RNA molecules in <italic>Drosophila</italic> cells</article-title><source>J Mol Biol</source><volume>98</volume><fpage>277</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1016/s0022-2836(75)80117-3</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jordan</surname><given-names>BR</given-names></name><name><surname>Jourdan</surname><given-names>R</given-names></name><name><surname>Jacq</surname><given-names>B</given-names></name></person-group><year>1976</year><article-title>Late steps in the maturation of <italic>Drosophila</italic> 26 S ribosomal RNA: generation of 5-8 S and 2 S RNAs by cleavages occurring in the cytoplasm</article-title><source>J Mol Biol</source><volume>101</volume><fpage>85</fpage><lpage>105</lpage><pub-id pub-id-type="doi">10.1016/0022-2836(76)90067-x</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jungreis</surname><given-names>I</given-names></name><name><surname>Lin</surname><given-names>MF</given-names></name><name><surname>Spokony</surname><given-names>R</given-names></name><name><surname>Chan</surname><given-names>CS</given-names></name><name><surname>Negre</surname><given-names>N</given-names></name><name><surname>Victorsen</surname><given-names>A</given-names></name><etal/></person-group><year>2011</year><article-title>Evidence of abundant stop codon readthrough in <italic>Drosophila</italic> and other metazoa</article-title><source>Genome Res</source><volume>21</volume><fpage>2096</fpage><lpage>113</lpage><pub-id pub-id-type="doi">10.1101/gr.119974.110</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaiser</surname><given-names>CA</given-names></name><name><surname>Botstein</surname><given-names>D</given-names></name></person-group><year>1990</year><article-title>Efficiency and diversity of protein localization by random signal sequences</article-title><source>Mol Cell Biol</source><volume>10</volume><fpage>3163</fpage><lpage>73</lpage></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaiser</surname><given-names>CA</given-names></name><name><surname>Preuss</surname><given-names>D</given-names></name><name><surname>Grisafi</surname><given-names>P</given-names></name><name><surname>Botstein</surname><given-names>D</given-names></name></person-group><year>1987</year><article-title>Many random sequences functionally replace the secretion signal sequence of yeast invertase</article-title><source>Science</source><volume>235</volume><fpage>312</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1126/science.3541205</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karijolich</surname><given-names>J</given-names></name><name><surname>Yu</surname><given-names>Y</given-names></name></person-group><year>2011</year><article-title>Converting nonsense codons into sense codons by targeted pseudouridylation</article-title><source>Nature</source><volume>474</volume><fpage>395</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1038/nature10165</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Keeling</surname><given-names>KM</given-names></name><name><surname>Lanier</surname><given-names>J</given-names></name><name><surname>Du</surname><given-names>M</given-names></name><name><surname>Salas-Marco</surname><given-names>J</given-names></name><name><surname>Gao</surname><given-names>L</given-names></name><name><surname>Kaenjak-Angeletti</surname><given-names>A</given-names></name><etal/></person-group><year>2004</year><article-title>Leaky termination at premature stop codons antagonizes nonsense-mediated mRNA decay in <italic>S. cerevisiae</italic></article-title><source>RNA</source><volume>10</volume><fpage>691</fpage><lpage>703</lpage><pub-id pub-id-type="doi">10.1261/rna.5147804</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Klagges</surname><given-names>BR</given-names></name><name><surname>Heimbeck</surname><given-names>G</given-names></name><name><surname>Godenschwege</surname><given-names>TA</given-names></name><name><surname>Hofbauer</surname><given-names>A</given-names></name><name><surname>Pflugfelder</surname><given-names>GO</given-names></name><name><surname>Reifegerste</surname><given-names>R</given-names></name><etal/></person-group><year>1996</year><article-title>Invertebrate synapsins: a single gene codes for several isoforms in <italic>Drosophila</italic></article-title><source>J Neurosci</source><volume>16</volume><fpage>3154</fpage><lpage>65</lpage></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kopczynski</surname><given-names>JB</given-names></name><name><surname>Raff</surname><given-names>AC</given-names></name><name><surname>Bonner</surname><given-names>JJ</given-names></name></person-group><year>1992</year><article-title>Translational readthrough at nonsense mutations in the HSF1 gene of <italic>Saccharomyces cerevisiae</italic></article-title><source>Mol Gen Genet</source><volume>234</volume><fpage>369</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.1007/bf00538696</pub-id></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kosugi</surname><given-names>S</given-names></name><name><surname>Hasebe</surname><given-names>M</given-names></name><name><surname>Tomita</surname><given-names>M</given-names></name><name><surname>Yanagawa</surname><given-names>H</given-names></name></person-group><year>2009</year><article-title>Systematic identification of cell cycle-dependent yeast nucleocytoplasmic shuttling proteins by prediction of composite motifs</article-title><source>Proc Natl Acad Sci USA</source><volume>106</volume><fpage>10171</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1073/pnas.0900604106</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Krogh</surname><given-names>A</given-names></name><name><surname>Larsson</surname><given-names>B</given-names></name><name><surname>von Heijne</surname><given-names>G</given-names></name><name><surname>Sonnhammer</surname><given-names>EL</given-names></name></person-group><year>2001</year><article-title>Predicting transmembrane protein topology with a hidden Markov model: application to complete genomes</article-title><source>J Mol Biol</source><volume>305</volume><fpage>567</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1006/jmbi.2000.4315</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kryukov</surname><given-names>GV</given-names></name><name><surname>Castellano</surname><given-names>S</given-names></name><name><surname>Novoselov</surname><given-names>SV</given-names></name><name><surname>Lobanov</surname><given-names>AV</given-names></name><name><surname>Zehtab</surname><given-names>O</given-names></name><name><surname>Guigó</surname><given-names>R</given-names></name><etal/></person-group><year>2003</year><article-title>Characterization of mammalian selenoproteomes</article-title><source>Science</source><volume>300</volume><fpage>1439</fpage><lpage>43</lpage><pub-id pub-id-type="doi">10.1126/science.1083516</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname><given-names>B</given-names></name><name><surname>Trapnell</surname><given-names>C</given-names></name><name><surname>Pop</surname><given-names>M</given-names></name><name><surname>Salzberg</surname><given-names>SL</given-names></name></person-group><year>2009</year><article-title>Ultrafast and memory-efficient alignment of short DNA sequences to the human genome</article-title><source>Genome Biol</source><volume>10</volume><fpage>R25</fpage><pub-id pub-id-type="doi">10.1186/gb-2009-10-3-r25</pub-id></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lasko</surname><given-names>P</given-names></name></person-group><year>2011</year><article-title>Posttranscriptional regulation in <italic>Drosophila</italic> oocytes and early embryos</article-title><source>Wiley Interdiscip Rev RNA</source><volume>2</volume><fpage>408</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1002/wrna.70</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lazarowitz</surname><given-names>SG</given-names></name><name><surname>Robertson</surname><given-names>HD</given-names></name></person-group><year>1977</year><article-title>Initiator regions from the small size class of reovirus messenger RNA protected by rabbit reticulocyte ribosomes</article-title><source>J Biol Chem</source><volume>252</volume><fpage>7842</fpage><lpage>9</lpage></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>GP</given-names></name><name><surname>Rice</surname><given-names>CM</given-names></name></person-group><year>1989</year><article-title>Mutagenesis of the in-frame opal termination codon preceding nsP4 of Sindbis virus: studies of translational readthrough and its effect on virus replication</article-title><source>J Virol</source><volume>63</volume><fpage>1326</fpage><lpage>37</lpage></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>MF</given-names></name><name><surname>Carlson</surname><given-names>JW</given-names></name><name><surname>Crosby</surname><given-names>MA</given-names></name><name><surname>Matthews</surname><given-names>BB</given-names></name><name><surname>Yu</surname><given-names>C</given-names></name><name><surname>Park</surname><given-names>S</given-names></name><etal/></person-group><year>2007</year><article-title>Revisiting the protein-coding gene catalog of <italic>Drosophila melanogaster</italic> using 12 fly genomes</article-title><source>Genome Res</source><volume>17</volume><fpage>1823</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1101/gr.6679507</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>MF</given-names></name><name><surname>Jungreis</surname><given-names>I</given-names></name><name><surname>Kellis</surname><given-names>M</given-names></name></person-group><year>2011</year><article-title>PhyloCSF: a comparative genomics method to distinguish protein coding and non-coding regions</article-title><source>Bioinformatics</source><volume>27</volume><fpage>i275</fpage><lpage>82</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btr209</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maurer-Stroh</surname><given-names>S</given-names></name><name><surname>Eisenhaber</surname><given-names>F</given-names></name></person-group><year>2005</year><article-title>Refinement and prediction of protein prenylation motifs</article-title><source>Genome Biol</source><volume>6</volume><fpage>R55</fpage><pub-id pub-id-type="doi">10.1186/gb-2005-6-6-r55</pub-id></element-citation></ref><ref id="bib50a"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Marygold</surname><given-names>SJ</given-names></name><name><surname>Leyland</surname><given-names>PC</given-names></name><name><surname>Seal</surname><given-names>RL</given-names></name><name><surname>Goodman</surname><given-names>JL</given-names></name><name><surname>Thurmond</surname><given-names>JR</given-names></name><name><surname>Strelets</surname><given-names>VB</given-names></name><name><surname>Wilson</surname><given-names>RJ</given-names></name> and the <collab>FlyBase Consortium</collab></person-group><year>2013</year><article-title>FlyBase: improvements to the bibliography</article-title><source>Nucleic Acids Res</source><volume>41(D1)</volume><fpage>D751</fpage><lpage>57</lpage><pub-id pub-id-type="doi">10.1093/nar/gks1024</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McCaughan</surname><given-names>KK</given-names></name><name><surname>Brown</surname><given-names>CM</given-names></name><name><surname>Dalphin</surname><given-names>ME</given-names></name><name><surname>Berry</surname><given-names>MJ</given-names></name><name><surname>Tate</surname><given-names>WP</given-names></name></person-group><year>1995</year><article-title>Translational termination efficiency in mammals is influenced by the base following the stop codon</article-title><source>Proc Natl Acad Sci USA</source><volume>92</volume><fpage>5431</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1073/pnas.92.12.5431</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Meijer</surname><given-names>HA</given-names></name><name><surname>Thomas</surname><given-names>AA</given-names></name></person-group><year>2002</year><article-title>Control of eukaryotic protein synthesis by upstream open reading frames in the 5’-untranslated region of an mRNA</article-title><source>Biochem J</source><volume>367</volume><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.1042/BJ20011706</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Michel</surname><given-names>AM</given-names></name><name><surname>Choudhury</surname><given-names>KR</given-names></name><name><surname>Firth</surname><given-names>AE</given-names></name><name><surname>Ingolia</surname><given-names>NT</given-names></name><name><surname>Atkins</surname><given-names>JF</given-names></name><name><surname>Baranov</surname><given-names>PV</given-names></name></person-group><year>2012</year><article-title>Observation of dually decoded regions of the human genome using ribosome profiling data</article-title><source>Genome Res</source><volume>22</volume><fpage>2219</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1101/gr.133249.111</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mottagui-Tabar</surname><given-names>S</given-names></name><name><surname>Tuite</surname><given-names>MF</given-names></name><name><surname>Isaksson</surname><given-names>LA</given-names></name></person-group><year>1998</year><article-title>The influence of 5’ codon context on translation termination in <italic>Saccharomyces cerevisiae</italic></article-title><source>Eur J Biochem</source><volume>257</volume><fpage>49</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1046/j.1432-1327.1998.2570249.x</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Namy</surname><given-names>O</given-names></name><name><surname>Duchateau-Nguyen</surname><given-names>G</given-names></name><name><surname>Hatin</surname><given-names>I</given-names></name><name><surname>Hermann-le Denmat</surname><given-names>S</given-names></name><name><surname>Termier</surname><given-names>M</given-names></name><name><surname>Rousset</surname><given-names>JP</given-names></name></person-group><year>2003</year><article-title>Identification of stop codon readthrough genes in <italic>Saccharomyces cerevisiae</italic></article-title><source>Nucleic Acids Res</source><volume>31</volume><fpage>2289</fpage><lpage>96</lpage><pub-id pub-id-type="doi">10.1093/nar/gkg330</pub-id></element-citation></ref><ref id="bib57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Namy</surname><given-names>O</given-names></name><name><surname>Duchateau-Nguyen</surname><given-names>G</given-names></name><name><surname>Rousset</surname><given-names>J</given-names></name></person-group><year>2002</year><article-title>Translational readthrough of the PDE2 stop codon modulates cAMP levels in <italic>Saccharomyces cerevisiae</italic></article-title><source>Mol Microbiol</source><volume>43</volume><fpage>641</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1046/j.1365-2958.2002.02770.x</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Napthine</surname><given-names>S</given-names></name><name><surname>Yek</surname><given-names>C</given-names></name><name><surname>Powell</surname><given-names>ML</given-names></name><name><surname>Brown</surname><given-names>TD</given-names></name><name><surname>Brierley</surname><given-names>I</given-names></name></person-group><year>2012</year><article-title>Characterization of the stop codon readthrough signal of Colorado tick fever virus segment 9 RNA</article-title><source>RNA</source><volume>18</volume><fpage>241</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1261/rna.030338.111</pub-id></element-citation></ref><ref id="bib59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Neuberger</surname><given-names>G</given-names></name><name><surname>Maurer-Stroh</surname><given-names>S</given-names></name><name><surname>Eisenhaber</surname><given-names>B</given-names></name><name><surname>Hartig</surname><given-names>A</given-names></name><name><surname>Eisenhaber</surname><given-names>F</given-names></name></person-group><year>2003</year><article-title>Prediction of peroxisomal targeting signal 1 containing proteins from amino acid sequence</article-title><source>J Mol Biol</source><volume>328</volume><fpage>581</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1016/s0022-2836(03)00319-x</pub-id></element-citation></ref><ref id="bib60"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oh</surname><given-names>E</given-names></name><name><surname>Becker</surname><given-names>AH</given-names></name><name><surname>Sandikci</surname><given-names>A</given-names></name><name><surname>Huber</surname><given-names>D</given-names></name><name><surname>Chaba</surname><given-names>R</given-names></name><name><surname>Gloge</surname><given-names>F</given-names></name><etal/></person-group><year>2011</year><article-title>Selective ribosome profiling reveals the cotranslational chaperone action of trigger factor in vivo</article-title><source>Cell</source><volume>147</volume><fpage>1295</fpage><lpage>308</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2011.10.044</pub-id></element-citation></ref><ref id="bib61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pavlakis</surname><given-names>GN</given-names></name><name><surname>Jordan</surname><given-names>BR</given-names></name><name><surname>Wurst</surname><given-names>RM</given-names></name><name><surname>Vournakis</surname><given-names>JN</given-names></name></person-group><year>1979</year><article-title>Sequence and secondary structure of <italic>Drosophila melanogaster</italic> 5.8S and 2S rRNAs and of the processing site between them</article-title><source>Nucleic Acids Res</source><volume>7</volume><fpage>2213</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1093/nar/7.8.2213</pub-id></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pisarev</surname><given-names>AV</given-names></name><name><surname>Kolupaeva</surname><given-names>VG</given-names></name><name><surname>Yusupov</surname><given-names>MM</given-names></name><name><surname>Hellen</surname><given-names>CU</given-names></name><name><surname>Pestova</surname><given-names>TV</given-names></name></person-group><year>2008</year><article-title>Ribosomal position and contacts of mRNA in eukaryotic translation initiation complexes</article-title><source>EMBO J</source><volume>27</volume><fpage>1609</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1038/emboj.2008.90</pub-id></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname><given-names>X</given-names></name><name><surname>Ahn</surname><given-names>S</given-names></name><name><surname>Speed</surname><given-names>TP</given-names></name><name><surname>Rubin</surname><given-names>GM</given-names></name></person-group><year>2007</year><article-title>Global analyses of mRNA translational control during early <italic>Drosophila</italic> embryogenesis</article-title><source>Genome Biol</source><volume>8</volume><fpage>R63</fpage><pub-id pub-id-type="doi">10.1186/gb-2007-8-4-r63</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ramaswami</surname><given-names>G</given-names></name><name><surname>Zhang</surname><given-names>R</given-names></name><name><surname>Piskol</surname><given-names>R</given-names></name><name><surname>Keegan</surname><given-names>LP</given-names></name><name><surname>Deng</surname><given-names>P</given-names></name><name><surname>O’Connell</surname><given-names>M</given-names></name><etal/></person-group><year>2013</year><article-title>Identifying RNA editing sites using RNA sequencing data alone</article-title><source>Nat Methods</source><volume>10</volume><fpage>128</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1038/nmeth.2330</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname><given-names>DN</given-names></name><name><surname>Cooley</surname><given-names>L</given-names></name></person-group><year>1997</year><article-title>Examination of the function of two kelch proteins generated by stop codon suppression</article-title><source>Development</source><volume>124</volume><fpage>1405</fpage><lpage>17</lpage></element-citation></ref><ref id="bib66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Skabkin</surname><given-names>MA</given-names></name><name><surname>Skabkina</surname><given-names>OV</given-names></name><name><surname>Hellen</surname><given-names>CU</given-names></name><name><surname>Pestova</surname><given-names>TV</given-names></name></person-group><year>2013</year><article-title>Reinitiation and other unconventional posttermination events during eukaryotic translation</article-title><source>Mol Cell</source><volume>51</volume><fpage>249</fpage><lpage>64</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2013.05.026</pub-id></element-citation></ref><ref id="bib67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Skuzeski</surname><given-names>JM</given-names></name><name><surname>Nichols</surname><given-names>LM</given-names></name><name><surname>Gesteland</surname><given-names>RF</given-names></name><name><surname>Atkins</surname><given-names>JF</given-names></name></person-group><year>1991</year><article-title>The signal for a leaky UAG stop codon in several plant viruses includes the two downstream codons</article-title><source>J Mol Biol</source><volume>218</volume><fpage>365</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1016/0022-2836(91)90718-l</pub-id></element-citation></ref><ref id="bib68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steneberg</surname><given-names>P</given-names></name><name><surname>Englund</surname><given-names>C</given-names></name><name><surname>Kronhamn</surname><given-names>J</given-names></name><name><surname>Weaver</surname><given-names>TA</given-names></name><name><surname>Samakovlis</surname><given-names>C</given-names></name></person-group><year>1998</year><article-title>Translational readthrough in the hdc mRNA generates a novel branching inhibitor in the drosophila trachea</article-title><source>Genes Dev</source><volume>12</volume><fpage>956</fpage><lpage>67</lpage><pub-id pub-id-type="doi">10.1101/gad.12.7.956</pub-id></element-citation></ref><ref id="bib69"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steneberg</surname><given-names>P</given-names></name><name><surname>Samakovlis</surname><given-names>C</given-names></name></person-group><year>2001</year><article-title>A novel stop codon readthrough mechanism produces functional Headcase protein in <italic>Drosophila</italic> trachea</article-title><source>EMBO Rep</source><volume>2</volume><fpage>593</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1093/embo-reports/kve128</pub-id></element-citation></ref><ref id="bib70"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stern-Ginossar</surname><given-names>N</given-names></name><name><surname>Weisburd</surname><given-names>B</given-names></name><name><surname>Michalski</surname><given-names>A</given-names></name><name><surname>Le</surname><given-names>VT</given-names></name><name><surname>Hein</surname><given-names>MY</given-names></name><name><surname>Huang</surname><given-names>SX</given-names></name><etal/></person-group><year>2012</year><article-title>Decoding human cytomegalovirus</article-title><source>Science</source><volume>338</volume><fpage>1088</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1126/science.1227919</pub-id></element-citation></ref><ref id="bib71"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tautz</surname><given-names>D</given-names></name><name><surname>Hancock</surname><given-names>JM</given-names></name><name><surname>Webb</surname><given-names>DA</given-names></name><name><surname>Tautz</surname><given-names>C</given-names></name><name><surname>Dover</surname><given-names>GA</given-names></name></person-group><year>1988</year><article-title>Complete sequences of the rRNA genes of <italic>Drosophila melanogaster</italic></article-title><source>Mol Biol Evol</source><volume>5</volume><fpage>366</fpage><lpage>76</lpage></element-citation></ref><ref id="bib72"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torabi</surname><given-names>N</given-names></name><name><surname>Kruglyak</surname><given-names>L</given-names></name></person-group><year>2011</year><article-title>Variants in SUP45 and TRM10 underlie natural variation in translation termination efficiency in <italic>Saccharomyces cerevisiae</italic></article-title><source>PLOS Genet</source><volume>7</volume><fpage>e1002211</fpage><pub-id pub-id-type="doi">10.1371/journal.pgen.1002211</pub-id></element-citation></ref><ref id="bib73"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torabi</surname><given-names>N</given-names></name><name><surname>Kruglyak</surname><given-names>L</given-names></name></person-group><year>2012</year><article-title>Genetic basis of hidden phenotypic variation revealed by increased translational readthrough in yeast</article-title><source>PLOS Genet</source><volume>8</volume><fpage>e1002546</fpage><pub-id pub-id-type="doi">10.1371/journal.pgen.1002546</pub-id></element-citation></ref><ref id="bib74"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>True</surname><given-names>HL</given-names></name><name><surname>Lindquist</surname><given-names>SL</given-names></name></person-group><year>2000</year><article-title>A yeast prion provides a mechanism for genetic variation and phenotypic diversity</article-title><source>Nature</source><volume>407</volume><fpage>477</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.1038/35035005</pub-id></element-citation></ref><ref id="bib75"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tuite</surname><given-names>MF</given-names></name><name><surname>Cox</surname><given-names>BS</given-names></name></person-group><year>2007</year><article-title>The genetic control of the formation and propagation of the [<italic>PSI</italic><sup>+</sup>] prion of yeast</article-title><source>Prion</source><volume>1</volume><fpage>101</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.4161/pri.1.2.4665</pub-id></element-citation></ref><ref id="bib76"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wills</surname><given-names>NM</given-names></name><name><surname>Gesteland</surname><given-names>RF</given-names></name><name><surname>Atkins</surname><given-names>JF</given-names></name></person-group><year>1991</year><article-title>Evidence that a downstream pseudoknot is required for translational read-through of the Moloney murine leukemia virus gag stop codon</article-title><source>Proc Natl Acad Sci USA</source><volume>88</volume><fpage>6991</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1073/pnas.88.16.6991</pub-id></element-citation></ref><ref id="bib77"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xue</surname><given-names>F</given-names></name><name><surname>Cooley</surname><given-names>L</given-names></name></person-group><year>1993</year><article-title>kelch encodes a component of intercellular bridges in <italic>Drosophila</italic> egg chambers</article-title><source>Cell</source><volume>72</volume><fpage>681</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1016/0092-8674(93)90397-9</pub-id></element-citation></ref><ref id="bib78"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yamaguchi</surname><given-names>Y</given-names></name><name><surname>Hayashi</surname><given-names>A</given-names></name><name><surname>Campagnoni</surname><given-names>CW</given-names></name><name><surname>Kimura</surname><given-names>A</given-names></name><name><surname>Inuzuka</surname><given-names>T</given-names></name><name><surname>Baba</surname><given-names>H</given-names></name></person-group><year>2012</year><article-title>L-MPZ, a novel isoform of myelin P0, is produced by stop codon readthrough</article-title><source>J Biol Chem</source><volume>287</volume><fpage>17765</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1074/jbc.m111.314468</pub-id></element-citation></ref><ref id="bib79"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yoshinaka</surname><given-names>Y</given-names></name><name><surname>Katoh</surname><given-names>I</given-names></name><name><surname>Copeland</surname><given-names>TD</given-names></name><name><surname>Oroszlan</surname><given-names>S</given-names></name></person-group><year>1985</year><article-title>Translational readthrough of an amber termination codon during synthesis of feline leukemia virus protease</article-title><source>J Virol</source><volume>55</volume><fpage>870</fpage><lpage>3</lpage></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01179.025</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Sonenberg</surname><given-names>Nahum</given-names></name><role>Reviewing editor</role><aff><institution>McGill University</institution>, <country>Canada</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elife.elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>Thank you for sending your work entitled “Ribosome profiling reveals pervasive and regulated stop codon readthrough in <italic>Drosophila melanogaster</italic>” for consideration at <italic>eLife</italic>. Your article has been favorably evaluated by a Senior editor and 3 reviewers, one of whom, Nahum Sonenberg, is a member of our Board of Reviewing Editors.</p><p>The consensus opinion of the reviewers is that this study is a thorough, compelling analysis that describes the development of a ribosome profiling assay for <italic>Drosophila melanogaster</italic> and provides the first genome-wide experimental analysis of stop codon readthrough. The data support most of your conclusions.</p><p>The important conclusions are that:</p><p>A) Readthrough is more pervasive than expected, and the majority of readthrough events observed were not predicted phylogenetically.</p><p>B) The C-terminal protein extensions show evidence of selection, contain functional subcellular localization signals, and their readthrough is regulated, arguing for their importance.</p><p>C) The readthrough might regulate gene expression and protein function, and to add plasticity to the proteome during evolution.</p><p>However, the reviewers raised several concerns and questions as described below.</p><p>1) You note that the locations of ribosome-protected footprint fragments from yeast and human ribosome profiling datasets exhibit 3-nucleotide periodicity from which reading frames can be deduced. Does the fly data provide enough resolution to also show such periodicity? If not, why?</p><p>2) The reviewers agree that you need to indicate more clearly the mean level of readthrough you observe with the predicted and novel extensions. You could have discussed this in a couple of places, but you didn't seize upon those opportunities. For example, <xref ref-type="fig" rid="fig5">Figure 5F</xref> appears to show that the mean readthrough rates observed range from 1-3%. However, only a couple of rather cryptic references that addressed this point are in the text. In the Results section you state that the human, yeast and fly samples cover similar ranges of efficiency. In the Discussion, you state that the readthrough rates range beyond 10%, while the baseline readthrough is much lower (0.0-1.4%). You need to address the mean level of readthrough more directly in the text since the level of readthrough you observe directly relates to your functional significance arguments in the Discussion. Have you tried to show that one of the newly discovered endogenous proteins has a longer isoform than the predicted size encoded by its corresponding coding region? If not, this needs to be noted.</p><p>3) Recent studies have shown that dedicated recycling factors (Rli1 in yeast and ABCE1 in mammals) are required for efficient ribosome release following translation termination. A key concern related to your predicted and novel extensions is whether the ribosomes distal to the stop codon represent translating ribosomes, or simply ribosomes that may not have properly released from the mRNA following termination at the stop codon. In a recent Cell paper (<xref ref-type="bibr" rid="bib23">Guttman et al, 2013</xref>), a parameter called the Ribosome Release Score (RRS) was used to discriminate between translated protein-coding regions and non-coding transcripts with similar ribosome densities. Could you apply that parameter to the stop codons of the predicted and novel extensions to provide further confidence that you truly represent translated extensions, rather than 3´-UTRs that simply don't release ribosomes? The Guttman paper should be cited.</p><p>4) Discussion: You state that readthrough is pervasive, biologically regulated, and functionally consequential, and thus provides an important mechanism to regulate gene expression and function. In light of my concern about the level of readthrough you are generally observing (1-3%), the reviewers think this is somewhat overstating your results. Until you have eliminated one or more of these C-terminal extensions and shown that it results in an adverse phenotype, you cannot say that these extensions are “functionally consequential”.</p><p>5) The evolutionary analysis presented near the end of the paper is problematic.</p><p>You partitioned the observed examples of readthrough into those that had been predicted by Lin et al. to have signatures of coding conservation (predicted readthrough) and those that didn't (novel readthrough). Then you use three pieces of data to argue that the novel readthroughs are under purifying selection to maintain their coding capacity, and that they are of recent evolutionary origin.</p><p>First, you used PhyloCSF to score the novel readthrough, finding that few score positively and that you have the same distribution of scores as non read-through 3'UTRs. You posit that there are only two possible explanations for this - that the novel readthrough are selectively neutral, or that you are of too recent origin to leave a detectable phylogenetic signature. But it is also possible that you have a signature, but it is simply too weak to detect with the model used by PhyloCSF. We come back to this point below.</p><p>Next, you compared predicted and novel readthrough, UTRs from non read-through genes, and coding sequence using an algorithm that separates coding and non-coding sequence using nucleotide frequencies, finding that novel readthrough were somewhere in between coding and predicted readthrough on the one hand and non-coding sequence on the other. You note that this is consistent with an “evolutionary trajectory” from non-coding to coding. However, it is also consistent with sequences that simply have weak coding propensity.</p><p>Finally, you look at <italic>D. melanogaster</italic> SNPs to evaluate whether there is a preference for synonymous relative to non-synonymous SNPs in novel vs predicted read-through, finding that there is a weak preference for synonymous SNPs, less than found in predicted SNPs, and say this is consistent with “mild or recently-imposed selection”. This might not be correct. If you have two read-through events – one which evolved at the base of the genus <italic>Drosophila</italic>, and one along the lineage that separates <italic>D. melanogaster</italic> from <italic>D. simulans</italic>/<italic>D. sechellia</italic> – and posit that these read-through events are under identical selective pressure, then you would expect both to have identical preference for synonymous substitutions, regardless of when you evolved. There is good reason to assume that recently evolved sequences would be under any different strength of selection. If this novel hypothesis were correct, you would probably expect some, if not most, of the novel events to be polymorphic within the population, with some of these in the middle of selective sweeps. But, this would produce a different synonymous vs non-synonymous pattern (with an excess of non-synonymous SNPs perhaps).</p><p>It is odd to argue that the majority of read-through events are novel and under selection – something that you'd only expect to find if readthrough events had very short evolutionary half-lives or if there were some reason to have specifically evolved functional read-through events in <italic>D. melanogaster</italic>.</p><p>The data are equally, if not more, consistent with the novel readthrough being subject to weak selection, with their origins unknown. One simple way to resolve this might be to run the Z-score program on orthologous UTRs of read-through and non-read-through genes across the genus. If these are novel to <italic>D. melanogaster</italic>, their scores should be significantly higher in <italic>D. melanogaster</italic>.</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01179.026</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p><italic>1) You note that the locations of ribosome-protected footprint fragments from yeast and human ribosome profiling datasets exhibit 3-nucleotide periodicity from which reading frames can be deduced. Does the fly data provide enough resolution to also show such periodicity? If not, why</italic>?</p><p>The fly data do not provide sufficient resolution to show periodicity. Because our standard nuclease, RNase I, destroys <italic>Drosophila</italic> ribosomes, we prepared the fly libraries with micrococcal nuclease (MNase), which <italic>Drosophila</italic> ribosomes tolerate well over a wide range of concentrations (see <xref ref-type="fig" rid="fig1s1">Figure 1–figure supplement 1</xref>). While RNase I is an unbiased enzyme, MNase has a strong 3' A/T bias. As a result, MNase-digested footprints are longer than RNase-digested footprints, and not always fully resolved to the edges of ribosomes. This fact gives rise to a small amount of positional uncertainty with P-site mapping in MNase datasets. We handle this uncertainty in our P-site assignment strategy by assigning a fraction of the P-site over a neighborhood of adjacent nucleotides determined by the length and endpoints of each read alignment as detailed in the methods section of our manuscript and as previously performed in <xref ref-type="bibr" rid="bib60">Oh et al., 2011</xref>. We had already included a discussion of mapping in the Materials and methods (section “Sequence processing and alignment”), but have now included an explicit discussion of why periodicity is not visible on fly data in two places in our main text.</p><p><italic>2) The reviewers agree that you need to indicate more clearly the mean level of readthrough you observe with the predicted and novel extensions. You could have discussed this in a couple of places, but you didn't seize upon those opportunities. For example,</italic> <xref ref-type="fig" rid="fig5"><italic>Figure 5F</italic></xref> <italic>appears to show that the mean readthrough rates observed range from 1-3%. However, only a couple of rather cryptic references that addressed this point are in the text. In the Results section you state that the human, yeast and fly samples cover similar ranges of efficiency. In the Discussion, you state that the readthrough rates range beyond 10%, while the baseline readthrough is much lower (0.0-1.4%). You need to address the mean level of readthrough more directly in the text since the level of readthrough you observe directly relates to your functional significance arguments in the Discussion</italic>.</p><p>We have added specific references to median levels of readthrough observed in the text. To set a threshold for biological significance, we now discuss readthrough rates in comparison to the distribution we observe for the phylogenetically conserved extensions in the <italic>Drosophila</italic> embryos. The rationale is as follows: because the phylogenetically conserved readthrough events are likely to be conserved because they are functional, and because only a specific fraction of these extensions have observable amounts of readthrough in our samples, we infer that this specific group, when translated, is translated at a rate that is biologically functional. We therefore re-framed our text to explicitly compare readthrough rates of various extensions against this phylogenetically conserved group of extensions in <italic>Drosophila melanogaster.</italic> We note this first when discussing readthrough in yeast and humans:</p><p>“To estimate how many of the novel extensions we detected might be translated at a biologically significant level…”</p><p>And we explicitly state the median readthrough rates we observe in our Discussion:</p><p>“Finally, our model predicts that conserved extensions should on average exhibit higher readthrough rates than novel extensions…”</p><p><italic>Have you tried to show that one of the newly discovered endogenous proteins has a longer isoform than the predicted size encoded by its corresponding coding region? If not, this needs to be noted</italic>.</p><p>We have amended the text to indicate that we did not seek to detect endogenous proteins. The readthrough reporters we designed for <xref ref-type="fig" rid="fig4">Figure 4D</xref> contain 120 codons upstream of the annotated stop codon, and the entire endogenous 3' UTR, the latter modified only to include a double FLAG epitope upstream of the extension's termination codon. Because we included so large a region of the endogenous mRNA in our constructs, we believe that they report readthrough at least as faithfully as those traditionally used in the literature to screen readthrough contexts, which include much less endogenous sequence (as few as 2–8 codons upstream and 3-15 codons downstream of the stop; Fearon et al<italic>.</italic>, 1994; Feng et al<italic>.</italic>, 1992; Harrell et al<italic>.</italic>, 2002; Namy et al<italic>.</italic>, 2002; Namy et al<italic>.</italic>, 2003), with the exception of a number of excellent papers that deduce the minimal requirements for readthrough in specific virus or host transcripts (Cimino et al<italic>.</italic>, 2011; Firth et al<italic>.</italic>, 2011; Napthine et al<italic>.</italic>, 2012; Skuzeski et al<italic>.</italic>, 1991; <xref ref-type="bibr" rid="bib69">Steneberg & Samakovlis, 2001</xref>).</p><p><italic>3) Recent studies have shown that dedicated recycling factors (Rli1 in yeast and ABCE1 in mammals) are required for efficient ribosome release following translation termination. A key concern related to your predicted and novel extensions is whether the ribosomes distal to the stop codon represent translating ribosomes, or simply ribosomes that may not have properly released from the mRNA following termination at the stop codon. In a recent Cell paper (</italic><xref ref-type="bibr" rid="bib23"><italic>Guttman et al, 2013</italic></xref><italic>), a parameter called the Ribosome Release Score (RRS) was used to discriminate between translated protein-coding regions and non-coding transcripts with similar ribosome densities. Could you apply that parameter to the stop codons of the predicted and novel extensions to provide further confidence that you truly represent translated extensions, rather than 3´-UTRs that simply don't release ribosomes? The Guttman paper should be cited</italic>.</p><p>We agree that this is an important point. However, it is important to note that RRS score as developed and validated in <xref ref-type="bibr" rid="bib23">Guttman et al., 2013</xref> is not applicable for this specific purpose. The authors state explicitly that RRS is best suited to classify a transcript as containing or lacking a single, predominant long open reading frame, not to choose which stop codon in that transcript is utilized:</p><p>“RRS is not designed to identify specific translated regions within a transcript containing multiple overlapping or nearby translated regions” (Guttman et al<italic>.</italic>, 2013).</p><p>Therefore, in our manuscript we sought to control for this possibility in two other ways.</p><p>First, we required a 75% or greater decrease in ribosome density following the first in-frame stop codon as a preliminary filtering criterion before a given C-terminal extension was even examined for readthrough (see Materials and methods section “Identification of C-terminal protein extensions” subsection “Identification of novel extensions”).</p><p>Second, we demonstrated in a metagene analysis that ribosome footprint density covering the stop codons that terminate C-terminal extensions extensions is qualitatively similar to the density covering stop codons of annotated coding regions (<xref ref-type="fig" rid="fig4">Figure 4C</xref>). In this analysis, clear termination peaks are visible over stop codons in both metagene averages, and, in each average, the normalized ribosome footprint density drops to negligible levels after the stop codon in question. Thus, the fact that ribosomes occupying the extensions show characteristic behaviors of termination (spikes at the stop codon, followed by a drop in density) at the C-termini of the extensions strongly argues that those ribosomes are engaged in active translation of extensions, rather than just sliding.</p><p>Nonetheless, we have included in this revised manuscript an additional analysis of ribosome release similar to the RRS score (<xref ref-type="fig" rid="fig4s1">Figure 4–figure supplement 1</xref>). Briefly, we tabulate the ratio of reads in a 5-codon window downstream of a given stop codon to the number of reads in a 5-codon window upstream of that stop codon. This criterion differs from the various ways RRS was calculated in Guttman et al. principally in that RRS is additionally normalized for mRNA fragment density to control for transcript mis-annotation (Guttman et al<italic>.</italic>, 2013), something we did not consider in our study as we manually verified the structures of all transcripts for which we report readthrough.</p><p>We perform this RRS-like calculation on the following classes of codons: 1) stop codons that terminate annotated coding regions, 2) stop codons that terminate C-terminal extensions, and 3) randomly-selected codons internal to annotated coding regions. We find that the release scores for C-terminal extensions fall well within the distribution for those of annotated coding regions, which again supports the notion that the ribosome footprint density covering extensions represents bona fide translation events, followed by termination at the expected stop codon. In addition, we have cited both <xref ref-type="bibr" rid="bib23">Guttman et al., 2013</xref> and <xref ref-type="bibr" rid="bib66">Skabkin et al., 2013</xref> and expanded our discussion of ribosome release to improve clarity of this issue:</p><p><italic>4) Discussion: You state that readthrough is pervasive, biologically regulated, and functionally consequential, and thus provides an important mechanism to regulate gene expression and function. In light of my concern about the level of readthrough you are generally observing (1-3%), the reviewers think this is somewhat overstating your results. Until you have eliminated one or more of these C-terminal extensions and shown that it results in an adverse phenotype, you cannot say that these extensions are “functionally consequential”</italic>.</p><p>We appreciate this objection and have changed our language accordingly. The sentence now reads:</p><p>“Our studies indicate that readthrough is far more pervasive than previously appreciated, is biologically regulated, and may append functional peptide signals to host proteins.”</p><p>Nonetheless, it is worth noting that finding a phenotype for a given protein is difficult. The <italic>Drosophila</italic> gene <italic>kelch,</italic> the first gene discovered to undergo readthrough in <italic>Drosophila,</italic> provides a good example. <italic>kelch</italic> is essential for female fertility, and has an extension notable for both its conservation (PhyloCSF score 7784, conserved throughout the sequenced <italic>Drosophila</italic> phylogeny) and length (787 amino acids). However, Robinson and colleagues found that expression of specifically the short form, but not of the long form, complemented the fertility defect observed in the null mutant (<xref ref-type="bibr" rid="bib65">Robinson & Cooley, 1997</xref>). Nonetheless, given its conservation and length, it is hard to imagine that this extension is not functional. Robinson and colleagues therefore proposed that the long form of <italic>kelch</italic> may be more important in other tissues (e.g., the imaginal discs, where the long form is specifically up-regulated relative to the short form), but they did not create the conditional mutants necessary to test this hypothesis (<xref ref-type="bibr" rid="bib65">Robinson & Cooley, 1997</xref>).</p><p>That said, we are very interested in investigating the functions of specific extensions, and will probably start this work in yeast, which offers sophisticated genetic tools, a simpler life history, and several C-terminal extensions in essential genes that might yield interesting phenotypes. We hope this will provide fertile ground for future studies.</p><p><italic>5) The evolutionary analysis presented near the end of the paper is problematic</italic>.</p><p>We recognize these concerns and have adjusted our language in the text to highlight alternate interpretations of the data per the reviewers' concerns (discussed further below). In this revised manuscript we try to make clear the evolutionary model we present is a more speculative part of the discussion. As with any evolutionary question, multiple explanations may account for our observations. We present in the revised text what we believe to be a reasonable and plausible explanation. Importantly, we do not wish to overstate our results or to give false impressions of certainty. Rather, we hope our work will provide a point of entry into further investigations on the origins and functions of C-terminal extensions.</p><p><italic>You partitioned the observed examples of readthrough into those that had been predicted by Lin et al. to have signatures of coding conservation (predicted readthrough) and those that didn't (novel readthrough). Then you use three pieces of data to argue that the novel readthroughs are under purifying selection to maintain their coding capacity, and that they are of recent evolutionary origin</italic>.</p><p><italic>First, you used PhyloCSF to score the novel readthrough, finding that few score positively and that you have the same distribution of scores as non read-through 3'UTRs. You posit that there are only two possible explanations for this – that the novel readthrough are selectively neutral, or that you are of too recent origin to leave a detectable phylogenetic signature. But it is also possible that you have a signature, but it is simply too weak to detect with the model used by PhyloCSF. We come back to this point below</italic>.</p><p>We regret that our manuscript was unclear on an important point: our goal was to evaluate — regardless of evolutionary age — whether any of the novel readthrough events we identified occur because they are biologically important or, alternatively, simply because they can occur without incurring a significant fitness disadvantage (i.e., are selectively neutral or nearly neutral). The first way we approached this question was to look for evidence of selection, the presence or absence of which would favor one hypothesis over the other.</p><p>We did interpret the novel extensions we identified to be, on average, evolutionarily recent in origin because of their negative PhyloCSF scores. In so doing, we made two assumptions. First, we assumed that PhyloCSF's model for the <italic>Drosophila</italic> phylogeny should be sensitive to detect conservation among most, even if not all, conserved coding regions. Secondly, we assumed that protein-coding selection, if present, should yield similar signatures in the amino acid sequences of known coding regions and of putative C-terminal extensions (i.e., conservation, if present, should favor synonymous amino acid substitutions over non-synonymous changes, and the primary amino acid sequence should be important).</p><p>This first assumption is consistent with performance benchmarks of PhyloCSF, which demonstrate that it detects signatures of protein coding conservation 93% of known <italic>Drosophila</italic> coding regions in <italic>Drosophila</italic> (Lin et al<italic>.</italic>, 2011). Even when restricted to short (10–60 codon) regions, which are notably difficult to evaluate (Lin et al., 2008), PhyloCSF still achieves greater than 90% sensitivity, with roughly 98% specificity (Lin et al<italic>.</italic>, 2011). Given that the novel extensions we report largely fall within this size range (median: 16 codons; 76% 10 codons or longer), we think that a signal of conservation, if present, should be detectable by PhyloCSF with comparable sensitivity (provided our second assumption, that extensions should be have similarly in their conservation properties to known coding regions, is also reasonable; further discussed below). Therefore, while some conserved signals may score negatively purely by chance, the majority of conserved signals should score positively.</p><p>In the specific case that selection is present but sufficiently weak to be undetectable, we would expect PhyloCSF scores to approach zero. This expectation arises from the fact that the PhyloCSF score is actually a likelihood ratio, calculated as the log ratio of probabilities of observing a given set of triplet substitutions under a coding model of evolution versus a non-coding model of evolution. Therefore, positive scores indicate a higher probability of observing a given set of substitutions if the ancestral sequence were coding, while a negative score actually indicates a greater likelihood of observing those substitutions under a non-coding model. In other words, a negative score actually provides evidence in favor of a non-coding model rather than indicate the absence of evidence for a protein-coding model. Simultaneous absence of evidence of both models would a ratio of probabilities close to 1.0 and a log-ratio of 0. Instead, we find a median PhyloCSF score for the novel extensions to be -159.8 decibans, indicating that, on average, a non-coding model fits these extensions far better (10<sup>16</sup>-fold) than a coding model. Despite this fact, in our manuscript, we deliberately use more conservative language and merely note a “lack of phylogenetic evidence for amino acid conservation,” rather than “evidence against conservation of protein coding.”</p><p>Our second assumption is that phylogenetic conservation should exhibit the same signals for C-terminal protein extensions as for classical protein-coding regions. We think this assumption to be reasonable because a large group of extensions — 283 proposed by Jungreis et al., 43 confirmed by us — conform to this behavior, and because, to our knowledge, the circumstances in which conservation does not yield such phylogenetic signatures are few.</p><p>However, such circumstances do exist. The prion-forming domains of fungi, in which prion forming ability can be maintained even if primary amino acid sequence is scrambled, provide an example (Ross et al<italic>.</italic>, 2005). It is possible that some of the extensions we have identified fall into a similar category, but we believe this number not to be large, as there are few known protein domains that behave similarly to the fungal prion-forming domains in this particular regard.</p><p>Another circumstance in which an ancient extension might score negatively by PhyloCSF is one in which the act of reading through a stop codon is ancient and phylogenetically conserved, but the amino acid sequence of the resulting extension unimportant, relatively unconstrained, and evolving. This scenario is similar to, but less constrained than, the case of the prion-forming domains in fungi, but supposes that either the act of readthrough or signals that incidentally promote readthrough, rather than readthrough products, are what has been selected. This circumstance provides a specific explanation for why an extension might be unselected (and appear to be selectively neutral or nearly-neutral, a model we already account for), rather than another model to explain the data.</p><p>Notably, in this scenario, if an evolutionarily unconstrained extension acquires a biological function that is subsequently selected and fixed, the extension and its function may be reasonably interpreted to be evolutionarily novel even if the act of readthrough at the upstream stop codon is ancient, because the extension's primary amino acid sequence and the function it yields are in fact evolutionarily novel. This circumstance is in fact a special case of the evolutionary model we propose in our Discussion, in which there is simply a long time lag between selection upon the extension and the appearance of a readthrough event.</p><p><italic>Next, you compared predicted and novel readthrough, UTRs from non read-through genes, and coding sequence using an algorithm that separates coding and non-coding sequence using nucleotide frequencies, finding that novel readthrough were somewhere in between coding and predicted readthrough on the one hand and non-coding sequence on the other. You note that this is consistent with an “evolutionary trajectory” from non-coding to coding. However, it is also consistent with sequences that simply have weak coding propensity</italic>.</p><p>These interpretations are mutually consistent. Under the Z-curve model, a weak coding propensity corresponds to a Z-curve score that is intermediate between the CDS-like and 3'UTR-like distributions. Our goal is to evaluate explanations for why these sequences, as a group, would show such a distribution of weak coding propensities. One explanation is the evolutionary trajectory we describe in our manuscript. Another plausible explanation would be the inverse trajectory, from a CDS-like nucleotide character to a 3'UTR-like nucleotide character. This situation would be expected to occur if the novel extensions we identified by ribosome profiling were caused by recent acquisition of a stop codon somewhere in the <italic>melanogaster</italic> lineage, followed by degradation of the formerly-coding sequence downstream of that stop. However, in this case, these extensions would be expected to: a) on average score positively by PhyloCSF (because they would be conserved in other, more ancient lineages), and b) lack upstream stop codons in species other than <italic>melanogaster.</italic> They do not. We therefore do not favor this model.</p><p>A third model, that the novel extensions as a group show a weak coding propensity by chance, is rejected by the Mann-Whitney U test, which demonstrates that the distribution of Z-curve scores is sufficiently different from that of distal 3' UTRs not to occur by chance (<italic>p</italic> = 1.02 × 10<sup>-23</sup>, Mann-Whitney U test, distal 3' UTR vs novel extensions).</p><p><italic>Finally, you look at</italic> D. melanogaster <italic>SNPs to evaluate whether there is a preference for synonymous relative to non-synonymous SNPs in novel vs predicted read-through, finding that there is a weak preference for synonymous SNPs, less than found in predicted SNPs, and say this is consistent with “mild or recently-imposed selection”. This might not be correct. If you have two read-through events – one which evolved at the base of the genus</italic> Drosophila<italic>, and one along the lineage that separates</italic> D. melanogaster <italic>from</italic> D. simulans<italic>/</italic>D. sechellia <italic>– and posit that these read-through events are under identical selective pressure, then you would expect both to have identical preference for synonymous substitutions, regardless of when you evolved. There is good reason to assume that recently evolved sequences would be under any different strength of selection. If this novel hypothesis were correct, you would probably expect some, if not most, of the novel events to be polymorphic within the population, with some of these in the middle of selective sweeps. But, this would produce a different synonymous vs non-synonymous pattern (with an excess of non-synonymous SNPs perhaps)</italic>.</p><p><italic>It is odd to argue that the majority of read-through events are novel and under selection – something that you'd only expect to find if readthrough events had very short evolutionary half-lives or if there were some reason to have specifically evolved functional read-through events in</italic> D. melanogaster.</p><p>We regret that the language in our manuscript appears to have been unclear on another critical point: we did not intend to imply that the majority of extensions are both evolutionarily novel and under purifying selection, as this would yield the surprising and unlikely conclusions noted by the reviewers. Rather, we intended to state: 1) that the majority of extensions are not phylogenetically conserved as measured by PhyloCSF, and 2) that a subset of this group includes evolutionarily recent extensions that are under selection for protein coding.</p><p>The non-conserved set of extensions also includes other subsets, for example: 1) selectively neutral extensions subject to genetic drift, 2) novel extensions undergoing diversifying selection, and 3) deleterious extensions presumably not undergoing fixation and occurring only in a small subset of the population. The weak preference for synonymous SNPs among the novel extensions can be explained by the fact that this group is a heterogeneous mix of these various subsets, which themselves are subject to different magnitudes and directions of selective pressure. However, there need only be a subset of extensions undergoing purifying selection in order to make the preference deviate from the background level observed in distal 3' UTRs as we observed in our data. Granted, in order to be consistent with our observations, the effect yielded by this subset must exceed the contribution from the set of extensions undergoing diversifying selection. We have adjusted the language in our manuscript accordingly.</p><p>In addition, we have adjusted our Discussion section, to highlight our interpretation that only a subset of novel extensions are under protein-coding selection.</p><p><italic>The data are equally, if not more, consistent with the novel readthrough being subject to weak selection, with their origins unknown. One simple way to resolve this might be to run the Z-score program on orthologous UTRs of read-through and non-read-through genes across the genus. If these are novel to</italic> D. melanogaster<italic>, their scores should be significantly higher in</italic> D. melanogaster.</p><p>We are indeed interested in more precisely determining the phylogenetic ages of the extensions we report. We believe this will be possible as more insect species are sequenced and incorporated into the insect phylogeny, and as more individuals of <italic>Drosophila melanogaster</italic> are sequenced and their phylogenetic relationships modeled.</p><p>At present, it is present possible to set loose bounds on the evolutionary ages of the phylogenetically conserved extensions by running PhyloCSF on phylogenetic subtrees on the sequenced <italic>Drosophila</italic> phylogeny, with the expectation that PhyloCSF scores for a given extension should increase as those lineages that lack the extension are pruned from the tree. However, it is not possible to perform such an analysis on novel extensions that came under selection within <italic>melanogaster,</italic> as these will not be conserved between species and therefore will not be measurable by cross-species tools such as PhyloCSF.</p><p>Unfortunately, a cross-species Z-curve analysis is also unlikely to answer this question, because: 1) Z-curve scores, evaluated for different species on the classifier trained specifically for <italic>melanogaster,</italic> would be differentially affected species-wise by species-specific nucleotide composition biases and 2) Z-curve scores for different species evaluated on different classifiers trained individually on each species will not be directly comparable, as Z-curve scores — like neural network scores or SVM scores — lack well-defined theoretical interpretations outside the score distribution of elements scored by the same classifier.</p><p>The best way to approach this would be to develop a PhyloCSF-like tool that would work on single-species population genetic data. Unfortunately, such a tool has not yet been published. While we are interested in this analysis for future work, we believe that estimating the precise evolutionary origin of each extension is beyond the purview of this specific manuscript.</p><p>Nonetheless, we do believe that, for the sake of the evolutionary model we have presented here, it is sufficient to demonstrate that a subset of the C-terminal extensions we have identified by ribosome profiling are evolutionarily novel and/or unique to <italic>melanogaster,</italic> even if their precise dates of origin are not defined. For the reasons outlined above (and in our manuscript), we continue to believe this interpretation to be the most plausible and consistent with our observations, and we hope the aforementioned changes to the manuscript are in the reviewers' opinions sufficient.</p></body></sub-article></article> |