Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| <?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article article-type="research-article" dtd-version="1.1d1" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">01102</article-id><article-id pub-id-type="doi">10.7554/eLife.01102</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Genomics and evolutionary biology</subject></subj-group><subj-group subj-group-type="heading"><subject>Microbiology and infectious disease</subject></subj-group></article-categories><title-group><article-title>The human gut and groundwater harbor non-photosynthetic bacteria belonging to a new candidate phylum sibling to Cyanobacteria</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes" id="author-4376"><name><surname>Di Rienzi</surname><given-names>Sara C</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-4505"><name><surname>Sharon</surname><given-names>Itai</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-6"/><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4506"><name><surname>Wrighton</surname><given-names>Kelly C</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4507"><name><surname>Koren</surname><given-names>Omry</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4508"><name><surname>Hug</surname><given-names>Laura A</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4509"><name><surname>Thomas</surname><given-names>Brian C</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="con7"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4510"><name><surname>Goodrich</surname><given-names>Julia K</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="con6"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4511"><name><surname>Bell</surname><given-names>Jordana T</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="other" rid="par-7"/><xref ref-type="other" rid="par-8"/><xref ref-type="fn" rid="con8"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-4512"><name><surname>Spector</surname><given-names>Timothy D</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="other" rid="par-7"/><xref ref-type="other" rid="par-8"/><xref ref-type="fn" rid="con9"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" corresp="yes" id="author-2642"><name><surname>Banfield</surname><given-names>Jillian F</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff4"/><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="other" rid="par-5"/><xref ref-type="other" rid="par-9"/><xref ref-type="fn" rid="con10"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" corresp="yes" id="author-3608"><name><surname>Ley</surname><given-names>Ruth E</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="corresp" rid="cor2">*</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-3"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con11"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><aff id="aff1"><institution content-type="dept">Department of Microbiology</institution>, <institution>Cornell University</institution>, <addr-line><named-content content-type="city">Ithaca</named-content></addr-line>, <country>United States</country></aff><aff id="aff2"><institution content-type="dept">Department of Earth and Planetary Science</institution>, <institution>University of California, Berkeley</institution>, <addr-line><named-content content-type="city">Berkeley</named-content></addr-line>, <country>United States</country></aff><aff id="aff3"><institution content-type="dept">Department of Twin Research and Genetic Epidemiology</institution>, <institution>King’s College London</institution>, <addr-line><named-content content-type="city">London</named-content></addr-line>, <country>United Kingdom</country></aff><aff id="aff4"><institution content-type="dept">Department of Environmental Science, Policy, and Management</institution>, <institution>University of California, Berkeley</institution>, <addr-line><named-content content-type="city">Berkeley</named-content></addr-line>, <country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Kolter</surname><given-names>Roberto</given-names></name><role>Reviewing editor</role><aff><institution>Harvard Medical School</institution>, <country>United States</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>jbanfield@berkeley.edu</email> (JFB);</corresp><corresp id="cor2"><label>*</label>For correspondence: <email>rel222@cornell.edu</email> (REL)</corresp><fn fn-type="con" id="equal-contrib"><label>†</label><p>These authors contributed equally to this work</p></fn></author-notes><pub-date date-type="pub" publication-format="electronic"><day>01</day><month>10</month><year>2013</year></pub-date><pub-date pub-type="collection"><year>2013</year></pub-date><volume>2</volume><elocation-id>e01102</elocation-id><history><date date-type="received"><day>19</day><month>06</month><year>2013</year></date><date date-type="accepted"><day>22</day><month>08</month><year>2013</year></date></history><permissions><copyright-statement>© 2013, Di Rienzi et al</copyright-statement><copyright-year>2013</copyright-year><copyright-holder>Di Rienzi et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/3.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife01102.pdf"/><abstract><object-id pub-id-type="doi">10.7554/eLife.01102.001</object-id><p>Cyanobacteria were responsible for the oxygenation of the ancient atmosphere; however, the evolution of this phylum is enigmatic, as relatives have not been characterized. Here we use whole genome reconstruction of human fecal and subsurface aquifer metagenomic samples to obtain complete genomes for members of a new candidate phylum sibling to Cyanobacteria, for which we propose the designation ‘Melainabacteria’. Metabolic analysis suggests that the ancestors to both lineages were non-photosynthetic, anaerobic, motile, and obligately fermentative. Cyanobacterial light sensing may have been facilitated by regulators present in the ancestor of these lineages. The subsurface organism has the capacity for nitrogen fixation using a nitrogenase distinct from that in Cyanobacteria, suggesting nitrogen fixation evolved separately in the two lineages. We hypothesize that Cyanobacteria split from Melainabacteria prior or due to the acquisition of oxygenic photosynthesis. Melainabacteria remained in anoxic zones and differentiated by niche adaptation, including for symbiosis in the mammalian gut.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.001">http://dx.doi.org/10.7554/eLife.01102.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.01102.002</object-id><title>eLife digest</title><p>Microbes are ubiquitous in the world and exist in complex communities called microbiomes that have colonized many environments, including the human gut. Until modern techniques for sequencing nucleic acids became available, many of the organisms found in these microbiomes could not be studied because they could not be cultured in the laboratory. However, advances in sequencing technology have made it possible to study the evolution and properties of these microbes, including their impact on human health.</p><p>Bacteria belonging to the phylum Cyanobacteria had a significant effect on the prehistoric Earth because they were the first organisms to produce gaseous oxygen as a byproduct of photosynthesis, and thus shaped the Earth’s oxygen-rich atmosphere. Early plants took up these bacteria in a symbiotic relationship, and plastids—the organelles in plant cells that perform photosynthesis and produce oxygen–are the descendants of Cyanobacteria.</p><p>Organisms evolutionarily related to Cyanobacteria have been found in the human gut and in various aquatic sources, but these bacteria have not been studied because it has not been possible to isolate or culture them. Now, Di Rienzi, Sharon et al. have used modern sequencing techniques to obtain complete genomes for some of these bacteria, which they assign to a new phylum called Melainabacteria.</p><p>By analyzing these genomes, Di Rienzi, Sharon et al. were able to make predictions about the cell structure and metabolic abilities of Melainabacteria. Like Cyanobacteria, they have two membranes surrounding the bacterial cell; unlike Cyanobacteria, however, they have flagella that propel them through liquid or across surfaces. Most interestingly, Melainabacteria are not able to perform photosynthesis, but instead produce energy through fermentation and release hydrogen gas that can be consumed by other microorganisms.</p><p>The genome of the bacteria isolated from water reveals that it has the capacity to fix nitrogen. Cyanobacteria can also fix atmospheric nitrogen, but the protein complexes used by the two phyla are not related, which suggests that nitrogen fixation evolved after the evolutionary divergence of Cyanobacteria and Melainabacteria.</p><p>By exploring previously published datasets of bacterial communities, Di Rienzi, Sharon et al. found that Melainabacteria are common in aquatic habitats. They are also prevalent in the guts of herbivorous mammals and humans with a predominantly vegetarian diet. Melainabacteria from the human gut also synthesize several B and K vitamins, which suggests that these bacteria are beneficial to their host because in addition to aiding with the digestion of plant fibers, they are also a source of vitamins.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.002">http://dx.doi.org/10.7554/eLife.01102.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>Cyanobacteria</kwd><kwd>Melainabacteria</kwd><kwd>photosynthesis</kwd><kwd>nitrogen fixation</kwd><kwd>human gut</kwd><kwd>subsurface</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>Human</kwd><kwd>Other</kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution>National Institutes of Health</institution></institution-wrap></funding-source><award-id>R01 DK093595</award-id><principal-award-recipient><name><surname>Ley</surname><given-names>Ruth E</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution>David and Lucile Packard Foundation</institution></institution-wrap></funding-source><award-id>2010-35960</award-id><principal-award-recipient><name><surname>Ley</surname><given-names>Ruth E</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution>The Hartwell Foundation</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Ley</surname><given-names>Ruth E</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution>Arnold and Mabel Beckman Foundation</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Ley</surname><given-names>Ruth E</given-names></name></principal-award-recipient></award-group><award-group id="par-5"><funding-source><institution-wrap><institution>DOE IFRC, Subsurface Biogeochemical Research Program, Office of Science, Biological and Environmental Research</institution></institution-wrap></funding-source><award-id>DE-AC02-05CH11231</award-id><principal-award-recipient><name><surname>Banfield</surname><given-names>Jillian F</given-names></name></principal-award-recipient></award-group><award-group id="par-6"><funding-source><institution-wrap><institution>EMBO</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Sharon</surname><given-names>Itai</given-names></name></principal-award-recipient></award-group><award-group id="par-7"><funding-source><institution-wrap><institution>Wellcome Trust</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Bell</surname><given-names>Jordana T</given-names></name><name><surname>Spector</surname><given-names>Timothy D</given-names></name></principal-award-recipient></award-group><award-group id="par-8"><funding-source><institution-wrap><institution>National Institute for Health Research</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Bell</surname><given-names>Jordana T</given-names></name><name><surname>Spector</surname><given-names>Timothy D</given-names></name></principal-award-recipient></award-group><award-group id="par-9"><funding-source><institution-wrap><institution>DOE Knowledgebase Program</institution></institution-wrap></funding-source><award-id>DE-SC0004918</award-id><principal-award-recipient><name><surname>Banfield</surname><given-names>Jillian F</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>Melainabacteria, a candidate phylum related to Cyanobacteria, has been identified in gut and sediment samples by genomic analysis.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Among the geochemical changes that have occurred over the past few billion years, perhaps the most dramatic was the transformation of the Earth’s atmosphere and upper oceans into oxygen-rich environments (<xref ref-type="bibr" rid="bib6">Bekker et al., 2004</xref>). Cyanobacteria are presumed responsible for this geochemical revolution, as they comprise the sole lineage known to have innovated the production of oxygen as a byproduct of photosynthesis (<xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>). Further, nitrogen fixation by Cyanobacteria is central to the Earth’s nitrogen cycle (<xref ref-type="bibr" rid="bib73">Vitousek et al., 2002</xref>). Cyanobacteria are inferred to be one of the earliest branching bacterial lineages (<xref ref-type="bibr" rid="bib1">Altermann and Kazmierczak, 2003</xref>; <xref ref-type="bibr" rid="bib6">Bekker et al., 2004</xref>) and have diversified across environments—land, fresh, and salt water, and all levels of the photic zone (<xref ref-type="bibr" rid="bib22">Dworkin, 2006</xref>). Via endosymbiosis, Cyanobacteria became the chloroplasts of plants (<xref ref-type="bibr" rid="bib58">Sagan, 1967</xref>), a role that underlines their broad evolutionary importance.</p><p>The evolutionary history of the Cyanobacteria phylum is only partially resolved: no related taxa, from which a common ancestor could be inferred, have been described. However, recent culture-independent 16S rRNA gene surveys of microbial communities have revealed a novel clade sibling and basal to Cyanobacteria (<xref ref-type="bibr" rid="bib40">Ley et al., 2005</xref>). The presence of organisms related to Cyanobacteria in the gut is notable because they are widely shared across individuals (<xref ref-type="bibr" rid="bib11">Consortium HMP, 2012</xref>), where members of this group can comprise up to 20% of the total sequences recovered from stool (<xref ref-type="bibr" rid="bib18">Dethlefsen and Relman, 2011</xref>), as well as shared across various mammalian species (<xref ref-type="bibr" rid="bib41">Ley et al., 2008</xref>). As Cyanobacteria are photosynthetic organisms, it has been assumed that these sequences represent genomic material derived from ingestion of chloroplasts or Cyanobacterial cells (<xref ref-type="bibr" rid="bib70">Turnbaugh et al., 2009</xref>; <xref ref-type="bibr" rid="bib35">Koenig et al., 2011</xref>; <xref ref-type="bibr" rid="bib11">Consortium HMP, 2012</xref>). However, given their large evolutionary separation from Cyanobacteria and the lack of cultured representatives, no conclusion as to the roles of these predominant organisms of the human gut has been possible.</p><p>Related bacteria deep-branching from the Cyanobacteria have also been detected in water and other anoxic environments including sediments (<xref ref-type="bibr" rid="bib40">Ley et al., 2005</xref>). An earlier phylogenetic reconstruction based on full-length 16S rRNA gene sequences indicated that this water-soil-sediment-derived clade is distinct from that made up entirely of gut-derived sequences (<xref ref-type="bibr" rid="bib40">Ley et al., 2005</xref>). This mapping of habitats onto the two clades suggested that niche adaptation had shaped the groups’ evolution and hence their phylogeny, but beyond this observation the lineage remained enigmatic.</p><p>New sequencing methods and bioinformatics advances provide a route for genomic analysis of uncultured organisms from complex microbial communities (<xref ref-type="bibr" rid="bib19">Dick et al., 2009</xref>; <xref ref-type="bibr" rid="bib31">Iverson et al., 2012</xref>; <xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). We show that these methods can yield complete and near-complete genomes from relatively low abundance organisms, without the need for single cell genomic approaches. Here, we analyze eight curated genomes from bacteria from intestinal samples and an aquifer sediment to evaluate their metabolisms and roles in their respective habitats. The analysis provides clues as to the ancestral state of the lineage that gave rise to these organisms and to the Cyanobacteria.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>Reconstruction of genomes from metagenomic samples</title><p>We generated metagenomes from three fecal samples obtained from three healthy adult humans (termed A, B, and C; <xref ref-type="table" rid="tbl1">Table 1</xref>). In addition, we identified genome fragments that derived from an organism deeply branching with respect to Cyanobacteria in a microbial community metagenomic dataset from a subsurface aquifer (<xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). The microbial communities in the human fecal samples and the subsurface differ substantially. The bacterial communities of human fecal samples A, B, and C are typical of human fecal microbiota, as they are predominantly composed of members of the Firmicutes and Bacteroidetes, with a few other phyla represented (<xref ref-type="fig" rid="fig1">Figure 1A–C</xref>) (<xref ref-type="bibr" rid="bib11">Consortium HMP, 2012</xref>). The subsurface sample, on the other hand, has a greater phylum-level phylogenetic diversity with the most abundant members belonging to Proteobacteria and the candidate phyla OD1 and OP11 (<xref ref-type="fig" rid="fig1">Figure 1D</xref>). For both sample types, the abundances of genomes from the Cyanobacteria sibling clade were less than 5% of the total community.<table-wrap id="tbl1" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.003</object-id><label>Table 1.</label><caption><p>Samples from which Melainabacteria genomes were recovered</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.003">http://dx.doi.org/10.7554/eLife.01102.003</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Sample</th><th>Environment</th><th># of reads</th><th>Abundance%</th><th># genomes recovered</th></tr></thead><tbody><tr><td>A</td><td>Gut</td><td>109,557,616</td><td align="char" char=".">4</td><td>1 Complete, 1 Partial</td></tr><tr><td>B</td><td>Gut</td><td>124,163,248</td><td align="char" char=".">3</td><td>2 Complete</td></tr><tr><td>C</td><td>Gut</td><td>112,578,264</td><td align="char" char=".">2</td><td>1 Complete, 1 Near Complete, 1 Partial</td></tr><tr><td>ACD</td><td>Aquifer</td><td>232,878,979</td><td align="char" char=".">0.7</td><td>1 Near Complete</td></tr></tbody></table><table-wrap-foot><fn><p>Sample, number of reads sequenced, and estimates of the abundance of Melainabacteria in the communities based on 16S rRNA gene survey and coverage information. ACD20 was assembled from three samples (see <xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>).</p></fn></table-wrap-foot></table-wrap><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.004</object-id><label>Figure 1.</label><caption><title>Community composition of samples containing Melainabacteria.</title><p>(<bold>A</bold>–<bold>C</bold>) The relative composition of the human fecal samples A, B, and C, and (<bold>D</bold>) the aquifer community members. In <bold>A</bold> and <bold>D</bold> estimated percent relative abundance of the community is plotted, and in <bold>B</bold> and <bold>C</bold>, coverage is plotted, but estimated percent relative abundance is noted on the figure for select members. Organisms are classified at the phylum level. The human fecal sample A community is dominated by <italic>Prevotella copri</italic> DSM 18205, which accounts for more than 40% of the sequencing reads and is represented by several strains. Sequencing depth was not sufficient for human fecal sample C to accurately estimate roughly 25% of the community abundance, which includes MEL.C3. Aspects of the community composition of the aquifer sample are discussed in <xref ref-type="bibr" rid="bib74">Wrighton et al. (2012)</xref>.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.004">http://dx.doi.org/10.7554/eLife.01102.004</ext-link></p></caption><graphic xlink:href="elife01102f001"/></fig></p><p>Despite the relatively low abundance of these genomes in the samples (<xref ref-type="table" rid="tbl1">Table 1</xref>), recently developed algorithms that improve the assembly and manual curation of metagenomic data (<xref ref-type="bibr" rid="bib62">Sharon et al., 2013</xref>) allowed us to recover two genomes from sample A (MEL.A1, MEL.A2), two from sample B (MEL.B1, MEL.B2), and three genomes from sample C (MEL.C1, MEL.C2, MEL.C3) for a total of seven distinct genomes reconstructed from human fecal samples (<xref ref-type="table" rid="tbl1 tbl2">Tables 1 and 2</xref>).</p><p>Through genome curation, we were able to establish linkage among all scaffolds for four of these genomes (complete genomes; <xref ref-type="table" rid="tbl2">Table 2</xref>). Completeness was confirmed by validating assembly graph connectivity, and also by considering expected genome features such as single copy genes. Correctness was confirmed by re-assembly of potentially mis-assembled regions such as scaffold ends, and by considering the ‘phylogenetic profile’ of genes in each scaffold. Our curation method verified unique paired read placement throughout the reconstructed genomes, a requirement consistent with standard methods of isolate genomics. All scaffolds identified as deriving from an organism with some similarity to Cyanobacteria, based on the phylogenetic profile of the encoded genes, were incorporated into the closed, complete genomes. Additional small scaffolds were identified and incorporated using paired read placement. The phylogenetic signal for novelty was robust, because essentially all other genomic fragments (excluding phage and plasmids) shared high similarity with genomes of previously sequenced organisms.<table-wrap id="tbl2" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.005</object-id><label>Table 2.</label><caption><p>Melainabacteria genomes recovered in this study</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.005">http://dx.doi.org/10.7554/eLife.01102.005</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Sample ID</th><th>Coverage</th><th>Genome status</th><th>Size (bp)</th><th>%GC</th><th>Scaffolds</th><th>N50</th><th>Coding features</th><th>16S rRNA genes</th></tr></thead><tbody><tr><td>ACD20</td><td>30x</td><td>Near Complete</td><td align="char" char=".">2,979,548</td><td align="char" char=".">33.5</td><td align="char" char=".">191</td><td align="char" char=".">33,361</td><td align="char" char=".">2,819</td><td>ND</td></tr><tr><td>MEL.A1</td><td>73x</td><td>Complete</td><td align="char" char=".">1,867,336</td><td align="char" char=".">32.9</td><td align="char" char=".">1</td><td align="char" char=".">1,867,336</td><td align="char" char=".">1,832</td><td align="char" char=".">2</td></tr><tr><td>MEL.A2</td><td>5.5x</td><td>Partial</td><td align="char" char=".">1,192,455</td><td align="char" char=".">30.6</td><td align="char" char=".">88</td><td align="char" char=".">16,613</td><td align="char" char=".">1,386</td><td>ND</td></tr><tr><td>MEL.B1</td><td>62x</td><td>Complete</td><td align="char" char=".">2,302,307</td><td align="char" char=".">35.3</td><td align="char" char=".">21</td><td align="char" char=".">542,117</td><td align="char" char=".">2,219</td><td align="char" char=".">2</td></tr><tr><td>MEL.B2</td><td>44x</td><td>Complete</td><td align="char" char=".">2,308,205</td><td align="char" char=".">36.3</td><td align="char" char=".">26</td><td align="char" char=".">375,376</td><td align="char" char=".">2,222</td><td align="char" char=".">2</td></tr><tr><td>MEL.C1</td><td>26.5x</td><td>Complete</td><td align="char" char=".">2,053,642</td><td align="char" char=".">34.1</td><td align="char" char=".">4</td><td align="char" char=".">1,742,055</td><td align="char" char=".">2,120</td><td align="char" char=".">2</td></tr><tr><td>MEL.C2</td><td>27.5x</td><td>Near Complete</td><td align="char" char=".">2,159,327</td><td align="char" char=".">35.3</td><td align="char" char=".">34</td><td align="char" char=".">146,232</td><td align="char" char=".">2,104</td><td align="char" char=".">2</td></tr><tr><td>MEL.C3</td><td>6x</td><td>Partial</td><td align="char" char=".">1,323,478</td><td align="char" char=".">29.9</td><td align="char" char=".">93</td><td align="char" char=".">15,878</td><td align="char" char=".">1,472</td><td>ND</td></tr></tbody></table><table-wrap-foot><fn><p>ND = not determined. See the section <italic>Genome assembly</italic> in ‘Materials and methods’ for an explanation of Genome Status.</p></fn></table-wrap-foot></table-wrap></p><p>The assembled genomes range from 1.9 to 2.3 Mbp and encode 1,800 to 2,230 genes. Additionally, we analyzed the binned genome, hereafter, ACD20, (<xref ref-type="table" rid="tbl1 tbl2">Tables 1 and 2</xref>) from the aquifer dataset (<xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). The ACD20 genome is larger than the genomes recovered from fecal samples—3.0 Mbp encoding 2,819 genes. Additional genome details are provided in <xref ref-type="table" rid="tbl1 tbl2">Tables 1 and 2</xref>. We used all eight genomes for phylogenetic analyses and four representative genomes (three from the gut plus the sediment genome) for the metabolic analyses that follow.</p></sec><sec id="s2-2"><title>A new candidate phylum sibling to Cyanobacteria</title><p>Corroborating earlier findings (<xref ref-type="bibr" rid="bib40">Ley et al., 2005</xref>), a 16S rRNA gene sequence-based phylogeny built with publically available sequences places the unknown lineages, represented in part by the gut and aquifer bacteria, basal to photosynthetic Cyanobacteria (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). Three subgroups are revealed, one of which comprises sequences obtained from animal guts (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). The 16S rRNA gene sequences of the gut and aquifer bacteria share no more than 84% identity to Cyanobacterial sequences, consistent with placement of these organisms in a new candidate phylum (<85% identity, as suggested previously [<xref ref-type="bibr" rid="bib30">Hugenholtz et al., 1998</xref>]). The bacterial tree has been described as a polytomy due to the inability of 16S rRNA gene phylogenies to capture any specific branching order for the phyla (<xref ref-type="bibr" rid="bib53">Pace, 1997</xref>), so this phylum is unusual in its robustly supported relationship to Cyanobacteria. To further substantiate this evolutionary relationship, we constructed a phylogeny of concatenated ribosomal protein sequences (<xref ref-type="fig" rid="fig3">Figure 3A,B</xref>). The result shows that the eight new genomes form a monophyletic lineage that branches deeply from the Cyanobacterial lineage, with ACD20 basal to the group (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). Importantly, a common ancestor for these organisms and photosynthetic Cyanobacteria is well supported (100/100 bootstrap bipartitions) in both trees. With the sum of this evidence, we designate these bacteria as the new candidate phylum Melainabacteria, where ‘melaina’ refers to the Greek nymph of dark waters.<fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.006</object-id><label>Figure 2.</label><caption><title>16S rRNA gene phylogeny of Melainabacteria and Cyanobacteria.</title><p>Trees were built using 16S rRNA gene sequences from MEL.A1, MEL.B1, and MEL.B2 (the 16S rRNA sequence of ACD20 was not recovered). (<bold>A</bold>) 16S rRNA gene phylogeny tree with five representative sequences from each phylum obtained from the Greengenes May 2011 database (<xref ref-type="bibr" rid="bib17">DeSantis et al., 2006</xref>). Bootstrap values greater than 50% are indicated. (<bold>B</bold>) 16S rRNA gene phylogeny built using one representative sequence from each order within Cyanobacteria from the Greengenes database (May 2011) (<xref ref-type="bibr" rid="bib17">DeSantis et al., 2006</xref>) besides orders YS2, SM1D11, and mle1-12 from which all sequences were used. For Melainabacteria, the habitats from which the sequences were predominantly derived are indicated and colored according to isolation source (blue = environmental (non-gut); brown = gut). Cyanobacteria are displayed in green. Bootstrap values greater than 70% are indicated by a black square.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.006">http://dx.doi.org/10.7554/eLife.01102.006</ext-link></p></caption><graphic xlink:href="elife01102f002"/></fig><fig-group><fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.007</object-id><label>Figure 3.</label><caption><title>Concatenated ribosomal protein phylogeny of the Melainabacteria and Cyanobacteria.</title><p>Maximum likelihood phylogeny and trait-based comparison of the eight novel organisms and 80 Cyanobacteria based on a concatenated protein alignment of 16 core ribosomal proteins from 733 taxa. In (<bold>A</bold>) the complete tree is shown at the phylum level and in (<bold>B</bold>) only the cyanobacterial-melainabacterial portion of the tree is shown. Bootstrap values >50% are indicated. Cyanobacteria branches are colored blue and Melainabacteria branches, red. The complete tree with all taxa shown is provided in <xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1</xref>. The protein alignment on which this tree is based is provided in <xref ref-type="supplementary-material" rid="SD1-data">Figure 3—source data 1</xref>.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.007">http://dx.doi.org/10.7554/eLife.01102.007</ext-link></p><p><supplementary-material id="SD1-data"><object-id pub-id-type="doi">10.7554/eLife.01102.008</object-id><label>Figure 3—source data 1.</label><caption><title>Concatenated protein alignment of 16 core ribosomal proteins from 733 taxa and the eight Melainabacteria described here.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.008">http://dx.doi.org/10.7554/eLife.01102.008</ext-link></p></caption><media mime-subtype="fasta" mimetype="application" xlink:href="elife01102s001.fasta"/></supplementary-material></p></caption><graphic xlink:href="elife01102f003"/></fig><fig id="fig3s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01102.009</object-id><label>Figure 3—figure supplement 1.</label><caption><title>Complete phylogeny of 733 taxa and the eight Melainabacteria based on a concatenated protein alignment of 16 core ribosomal proteins.</title><p>Melainabacteria branches are shown in red and Cyanobacteria branches in blue. Bootstrap values >50% are indicated.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.009">http://dx.doi.org/10.7554/eLife.01102.009</ext-link></p></caption><graphic xlink:href="elife01102fs001"/></fig></fig-group></p><p>The newly described melainabacterial genomes contain other genes that reinforce an ancestry shared with Cyanobacteria. For instance, the genomes encode the 30S ribosomal protein S1 <italic>rpsA</italic> gene, rather than the homolog <italic>ypfD</italic>, which is exclusive to the Firmicutes (<xref ref-type="bibr" rid="bib10">Danchin, 2009</xref>). Moreover, two of the gut genomes (MEL.B1 and MEL.B2) encode the A type of RnpB (E-values 5.676e-59, 2.235e-60 respectively), which is found in all Bacteria except the Firmicutes and Tenericutes (<xref ref-type="bibr" rid="bib27">Haas et al., 1996</xref>; <xref ref-type="bibr" rid="bib83">Zwieb et al., 2011</xref>). Three of the four complete genomes also have homologs to the S-layer like COG, CyOG00138 (e.g., <italic>Anabaena variabilis</italic>: Q3MBT3), a protein found only in Cyanobacteria (<xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>). Although these genes are not exclusive to Cyanobacteria, they are important phylogenetic markers, support a shared ancestry with Cyanobacteria, and refute an ancestry with the Firmicutes, a possibility that arises when considering the metabolic genes of the Melainabacteria genomes (see below).</p></sec><sec id="s2-3"><title>Melainabacteria are non-photosynthetic and non-respiratory but contain homologs to genes encoding light response regulators in photosynthetic Cyanobacteria</title><p>Oxygenic photosynthesis is perhaps the most exceptional characteristic of Cyanobacteria, as all other bacterial photosyntheses are anoxygenic. The Melainabacteria genomes appear to entirely lack genes for photosynthesis (photosystem I and photosystem II, thylakoid membranes, succinate dehydrogenase, and the cytochrome b<sub>6</sub>f complex), indicating that none have the capacity for oxygenic or anoxygenic photosynthesis. Also absent are genes for soluble (e.g., plastocyanin or photosynthetic ferredoxin) and membrane-affiliated electron carrier proteins (e.g., cytochromes, quinones, Fe-S, or flavin). Additionally, we found no genes for aerobic respiration including terminal quinol and cytochrome <italic>c</italic> oxidases, terminal reductases involved in anaerobic respiration (e.g., fumarate, nitrate), or carbon fixation pathways. Together these findings suggest that these members of the Melainabacteria are not capable of phototrophy or respiratory metabolism.</p><p>Despite being non-photosynthetic, the melainabacterial genomes encode homologs of the circadian rhythm regulators RpaA and RpaB and the high intensity light sensor NblS. The histidine kinase NblS in Cyanobacteria preserves the photosynthetic machinery by regulating its expression and degradation under high and blue/UV-A conditions (<xref ref-type="bibr" rid="bib71">van Waasbergen et al., 2002</xref>). RpaA and RpaB are found in all Cyanobacteria (<xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>), where they regulate the circadian clock KaiABC (<xref ref-type="bibr" rid="bib28">Hanaoka et al., 2012</xref>) and link energy transfer between the antennae and the photosystem (<xref ref-type="bibr" rid="bib3">Ashby and Mullineaux, 1999</xref>). The four complete genomes of Melainabacteria lack SasA, which typically functions as the sensor to the response regulators RpaA and RpaB (<xref ref-type="bibr" rid="bib28">Hanaoka et al., 2012</xref>), as well as any photosynthetic machinery, which suggests that these proteins may have another function. Similarly, the NblS homolog in the gut and aquifer Melainabacteria most likely has a different function. The current KEGG database (<xref ref-type="bibr" rid="bib32">Kanehisa et al., 2012</xref>) indicates that RpaA and NblS are exclusive to Cyanobacteria. RpaB is likely also exclusive to Cyanobacteria; however, given this gene’s similarity to other response regulators it could be considered present in two other bacterial genomes, <italic>Variovorax paradoxus</italic> and <italic>Desulfomicrobium baculatum</italic>. In all cases, the three Melainabacteria genes have highest homology to those present in Cyanobacteria (<xref ref-type="supplementary-material" rid="SD6-data">Supplementary file 1</xref>).</p></sec><sec id="s2-4"><title>Melainabacteria are encapsulated by a Cyanobacteria-like cell envelope</title><p>Like Cyanobacteria, Melainabacteria are inferred to have a Gram-negative cell envelope. This conclusion is based on the large number of genes for lipopolysaccharide (LPS), Lipid A biosynthesis, and O-antigen polymerases and transporters found in the curated genomes described here (<xref ref-type="supplementary-material" rid="SD6-data">Supplementary file 1</xref>). LPS, Lipid A, and O-antigen are components of the Gram-negative outer membrane; in contrast, Gram-positive bacteria lack an outer membrane and therefore lack these structures (<xref ref-type="bibr" rid="bib56">Purves et al., 2003</xref>). This finding is significant, because many genes in the newly reconstructed genomes share closest sequence similarity to Gram-positive members of the Firmicutes (see below). As previously mentioned, the genomes encode a homolog to an S-layer-like protein, suggesting that the cell envelope has an S-layer. S-layers and O-antigens have both been previously observed on photosynthetic Cyanobacteria (<xref ref-type="bibr" rid="bib29">Hoiczyk and Hansel, 2000</xref>). Taken together, the cell envelope is likely similar to that of Cyanobacteria, consistent with a shared vertical ancestry.</p></sec><sec id="s2-5"><title>Melainabacteria are obligate anaerobic fermenters</title><p>Based on the lack of a linked electron transport chain, aerobic or anaerobic respiratory complexes (see above), and a complete TCA cycle (see below), and the presence of fermentative and degradative enzymes, we infer that Melainabacteria are obligate anaerobic fermenters. We predict that Melainabacteria can use a wide variety of carbon compounds, including hemicellulosic compounds (only ACD20), polysaccharides, oligosaccharides, and simple sugars, as well as organic acids, amino acids, and fatty acids to yield hydrogen, lactate, acetate (ACD20), formate (Gut), hydrogen, possibly butyrate (ACD20) and ethanol (Gut) (<xref ref-type="fig" rid="fig4">Figure 4</xref>). Specific sugars predicted to be fermented are glucose, fructose, sorbitol, mannose, trehalose, starch, glycogen, hemicellulose, and amylose, and the relevant enzymes are α-galactosidase, β-galactosidase, α-glucosidase, β-glucosidase, β-glucuronidase, β-fructofranosidase (sucrase), α-mannosidase, pullulanase, α-amylase, and endo-1, 4-beta-xylanase. These enzymes facilitate the utilization of a variety of sugar compounds by degrading the compounds into simpler sugars that can enter the main Embden-Meyerhof-Parnas (EMP) glycolytic pathway. This pathway contains not the classical ATP-dependent enzyme, but a pyrophosphate-dependent phosphofructokinase, a gene found in diverse organisms capable of anaerobic glycolysis (<xref ref-type="bibr" rid="bib49">Mertens, 1991</xref>). This difference in phosphoryl donor specificity may confer an energetic advantage to the cell when glycolysis is the primary source of ATP (<xref ref-type="fig" rid="fig4">Figure 4</xref>, gene 3). The genomes have the genes necessary for hexose interconversion in the EMP pathway via the pentose phosphate pathway, such that ribose, arabinose, xylulose, and other five-carbon sugar or sugar-alcohols may be utilized. Unlike most Cyanobacteria, which use internal carbon pools for fermentation (<xref ref-type="bibr" rid="bib66">Stal and Moezelaar, 1997</xref>), Melainabacteria likely acquire sugars and sugar-alcohols from the environment via a variety of cytoplasmic membrane permeases (<xref ref-type="fig" rid="fig4">Figure 4</xref>). Overall, the cells are inferred to have a lifestyle analogous to anaerobic obligate fermentative bacteria known to play an important role in carbon transformation in gut (<xref ref-type="bibr" rid="bib43">Mackie, 2002</xref>) and subsurface systems.<fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.010</object-id><label>Figure 4.</label><caption><title>The physiological and metabolic landscape of Melainabacteria.</title><p>Metabolic predictions for Melainabacteria based on genes identified in <xref ref-type="supplementary-material" rid="SD2-data">Figure 4—source data 1</xref>. Genes in pathways detected in the genomes of the subsurface and at least one gut genome (white box), only in the subsurface genome (grey box), only in at least one gut genome (orange box), and genes missing from pathways in all genomes (red box). Glycolysis proceeds via the canonical Embden-Meyerhof-Parnas (EMP) pathway with the exception of fructose-6-phosphate 1-phosphotransferase (EC:2.7.1.90, gene 3). Names of pathways and fermentation end-products are bolded and ATP generated by substrate-level phosphorylation are noted. All Melainabacteria genomes sampled lack electron transport chain components (including cytochromes (Cyto), succinate dehydrogenase (sdh), flavins, quinones), terminal respiratory oxidases or reductases, and photosystem I or II (PS1, PS2). The genomes also lack a complete TCA cycle (absent enzymes noted by red boxes), with the TCA enzymes instead linked to the fermentation of amino acids and organic acids denoted (pathways, blue arrows). Ferredoxin (Fd, green text) is important for hydrogen (H<sub>2</sub>) production via hydrogenases (yellow background box). Proton translocation mechanisms (green background box) may be achieved by the activity of trimeric oxaloacetate (OAA) decarboxylase and sodium-hydrogen antiporter, pyrophosphate (PPi) hydrolysis with pyrophosphatases, 11 subunit NADH dehydrogenase, and an annotated NiFe hydrogenase (green enzyme). Annotations for the gene numbers are in <xref ref-type="supplementary-material" rid="SD3-data">Figure 4—source data 2</xref>. The complete metabolic comparison of the Melainabacteria can be accessed at <ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/genome_summaries/81-MEL-Metabolic-Overview-June2013">http://ggkbase.berkeley.edu/genome_summaries/81-MEL-Metabolic-Overview-June2013</ext-link>.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.010">http://dx.doi.org/10.7554/eLife.01102.010</ext-link></p><p><supplementary-material id="SD2-data"><object-id pub-id-type="doi">10.7554/eLife.01102.011</object-id><label>Figure 4—source data 1.</label><caption><title>Examination of enzymes (steps) in near-complete KEGG based modules shared among or unique to subsurface ACD20 and gut Melainabacteria genomes MEL.A1, MEL.B1, and MEL.B2.</title><p>Analysis is based on the KEGG Module database (<xref ref-type="bibr" rid="bib33">Kanehisa and Goto, 2000</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.011">http://dx.doi.org/10.7554/eLife.01102.011</ext-link></p></caption><media mime-subtype="docx" mimetype="application" xlink:href="elife01102s002.docx"/></supplementary-material></p><p><supplementary-material id="SD3-data"><object-id pub-id-type="doi">10.7554/eLife.01102.012</object-id><label>Figure 4—source data 2.</label><caption><title>Gene annotations corresponding to the numbers in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</title><p>If the gene occurs in both the ACD20 and gut genomes, the reported annotation is based on ACD20.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.012">http://dx.doi.org/10.7554/eLife.01102.012</ext-link></p></caption><media mime-subtype="docx" mimetype="application" xlink:href="elife01102s003.docx"/></supplementary-material></p></caption><graphic xlink:href="elife01102f004"/></fig></p></sec><sec id="s2-6"><title>Metabolic differences between Melainabacteria and Cyanobacteria are represented by genes scattered throughout the Melainabacteria genome</title><p>Nearly 30% of the 920 core conserved orthologous genes (COGs) match most closely to genes belonging to members of the phylum Firmicutes that have a fermentative-based metabolism, compared to 15% with closest matches to cyanobacterial genes (<xref ref-type="fig" rid="fig5">Figure 5A</xref>). These non-cyanobacterial genes are spread throughout the genomes (<xref ref-type="fig" rid="fig5s1">Figure 5—figure supplement 1</xref>), arguing against acquisition via a recent lateral transfer event or a chimeric assembly artifact. The COGs whose best match are to COGs of Firmicutes are enriched in functions related to metabolism (<xref ref-type="fig" rid="fig5">Figure 5B</xref>), including metabolism of carbohydrates, amino sugars, nucleotides, amino acids, and vitamins. It should be noted that while the best match to these genes are within the Firmicutes phylum, the abundance of Firmicutes genomes in databases may have inherently biased this result. These results corroborate extensive divergence in the metabolic lifestyles of Cyanobacteria and Melainabacteria.<fig-group><fig id="fig5" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.013</object-id><label>Figure 5.</label><caption><title>MEL-COG phylum and functional assignments.</title><p>(<bold>A</bold>) Assignment of the 920 MEL-COGs (<xref ref-type="supplementary-material" rid="SD4-data">Figure 5—source data 1</xref>) to their best matching phyla. (<bold>B</bold>) Functional assignment of COGs by phylum assignment. Only COGs with functional assignments were considered. Number of MEL-COGs with no/multiple functional assignments are 532/42, 136/12, and 87/10 for All, Firmicutes, and Cyanobacteria, respectively.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.013">http://dx.doi.org/10.7554/eLife.01102.013</ext-link></p><p><supplementary-material id="SD4-data"><object-id pub-id-type="doi">10.7554/eLife.01102.014</object-id><label>Figure 5—source data 1.</label><caption><title>List of 920 MEL-COGs, including their assigned phylum and KEGG Orthology (KO) identifier.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.014">http://dx.doi.org/10.7554/eLife.01102.014</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife01102s004.xlsx"/></supplementary-material></p></caption><graphic xlink:href="elife01102f005"/></fig><fig id="fig5s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01102.015</object-id><label>Figure 5—figure supplement 1.</label><caption><title>Distribution of MEL-COGs with best hits from different phyla across the MEL-A1 genome.</title><p>Gene distribution across the genome does not support large-scale recombination, lateral transfer events, or a chimeric genome assembly accounting for the presence of genes with greater similarity to genes from phyla other than Cyanobacteria.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.015">http://dx.doi.org/10.7554/eLife.01102.015</ext-link></p></caption><graphic xlink:href="elife01102fs002"/></fig></fig-group></p></sec><sec id="s2-7"><title>The unusual hydrogenases of the Melainabacteria suggest a syntrophic H<sub>2</sub>−producing niche</title><p>Both gut and subsurface Melainabacteria studied here have trimeric confurcating FeFe hydrogenases with motifs required for catalytic activity (<xref ref-type="fig" rid="fig4">Figure 4</xref>, <xref ref-type="supplementary-material" rid="SD6-data">Supplementary file 1</xref>). One of the three confurcating hydrogenases in ACD20, ACD20_18461, has all the necessary residues for functionality (L1, L2, and L3 motifs), while the other two, ACD20_9246_G0007 and ACD20_9246_G0010, contain all three motifs but have a replacement of a serine by cysteine in motif 1 (TSCSPGW rather than TSCCPAW) and have replaced the cysteine in motif 3 with isoleucine. The gut Melainabacteria FeFe hydrogenase has a complete L1 motif but lacks L2 and L3 motifs. These hydrogenases may indicate an ecological role in H<sub>2</sub> production in both gut and subsurface systems: syntenic homologs for the three subunits have been identified in genomes of obligate syntrophs and fermenters known to produce high molar ratios of H<sub>2</sub> (<xref ref-type="bibr" rid="bib64">Sieber et al., 2012</xref>). The production of H<sub>2</sub> typically requires a syntrophic association with an H<sub>2</sub>-consuming partner to maintain low partial hydrogen pressures. Therefore, in addition to being anaerobic fermenters, the Melainabacteria may be H<sub>2</sub> producers living in syntrophy with archaeal methanogens or bacterial acetogens in the human gut and with respiring organisms in the subsurface.</p></sec><sec id="s2-8"><title>The Melainabacteria genomes encode an incomplete TCA cycle</title><p>With <italic>Prochlorococcus</italic>, marine <italic>Synechococcus</italic>, and UCYN-A as exceptions, the vast majority of Cyanobacteria have a complete TCA cycle (<xref ref-type="bibr" rid="bib81">Zhang and Bryant, 2011</xref>). In contrast, the Melainabacteria genomes reported here encode no more than four unlinked genes from the TCA and reverse TCA cycles (<xref ref-type="fig" rid="fig4">Figure 4</xref>). We also confirmed that the genomes lack alternative enzymes first identified in <italic>Synechococcus</italic> species PCC 7002 that functionally complete the TCA cycle (<xref ref-type="bibr" rid="bib81">Zhang and Bryant, 2011</xref>). Moreover, the genomes have an NAD<sup>+</sup> not the NADP<sup>+</sup> dependent isocitrate dehydrogenase found in most Cyanobacteria. The absence of a complete TCA cycle necessitates an external requirement for dicarboxylic acids, which could be imported by the two dicarboxylic acid transporters found in each genome. This observation, along with the need for an H<sub>2</sub>-consuming partner, could guide development of growth media for future isolation efforts.</p><p>We infer that the TCA cycle enzymes in the genomes reported here function to link to nitrogen metabolism and energy generating pathways. Both the ACD20 and MEL.B2 genomes encode isocitrate dehydrogenase and fumarase, whose end-products are important intermediates in nitrogen assimilation and amino acid pathways. Unlike the genomes of the gut bacteria, ACD20 can augment glycolytic ATP substrate-level phosphorylation with additional ATP generation using TCA cycle intermediates. For example, unidirectional aspartate ammonia-lyase, fumarase, and malic enzyme can convert amino acids (alanine and aspartate) and organic acids to pyruvate and ultimately acetate with ATP generation. Also, like citrate fermentation in <italic>Klebsiella pneuemoniae</italic> (<xref ref-type="bibr" rid="bib21">Dimroth, 1980</xref>; <xref ref-type="bibr" rid="bib20">Dimroth and Schink, 1998</xref>), ACD20 may use citrate lyase in conjunction with the combined action of membrane-bound oxaloacetate decarboxylase (EC 4.1.1.3) to pump sodium ions. Combined with a sodium-hydrogen antiporter, a hydrogen gradient can be generated and used to drive cellular processes (<xref ref-type="fig" rid="fig4">Figure 4</xref>, blue arrows). Hence, the TCA cycle does not appear to function as it does in most Cyanobacteria, where it generates reducing equivalents for oxidative phosphorylation, but rather links to nitrogen metabolism and organic acid fermentation for energy generation.</p></sec><sec id="s2-9"><title>Production of proton-motive force and H<sub>2</sub></title><p>In the absence of a complete electron transport chain (above), the gut and aquifer bacteria studied here appear to augment the ATP produced by substrate-level phosphorylation by membrane energization. This membrane potential can be capitalized on for ATP synthesis by the F-type ATP synthase, used for flagellar motion (see below), or used for solute transport. Given that the human gut associated Melainabacteria only produce ATP by substrate-level phosphorylation in glycolysis, these alternative modes of generating an energized membrane may be important to their overall energy balance.</p><p>The Melainabacteria can generate a membrane potential by four mechanisms: (i) a sodium gradient generated by decarboxylation of oxaloacetate (using an oxaloacetate decarboxylase and sodium-hydrogen antiporter, see TCA cycle); (ii) membrane bound H<sup>+</sup>-translocating pyrophosphatases, which use some of the energy liberated during inorganic pyrophosphate (PPi) hydrolysis to drive proton translocation across the cytoplasmic membrane (<xref ref-type="bibr" rid="bib61">Schocke and Schink, 1998</xref>); (iii) an 11-subunit complex I dehydrogenase (<xref ref-type="bibr" rid="bib5">Battchikova et al., 2011</xref>); and (iv) a putatively annotated six subunit NiFe membrane-bound hydrogenase that lacks required hydrogen binding motifs (<xref ref-type="bibr" rid="bib72">Vignais and Billoud, 2007</xref>; <xref ref-type="bibr" rid="bib46">Marreiros et al., 2013</xref>). Notably, these genomes all lack membrane-associated <italic>Rhodobacter</italic> nitrogen fixation (Rnf) and formate dehydrogenase complexes found in the genomes of obligate fermentative organisms (<xref ref-type="bibr" rid="bib48">McInerney et al., 2007</xref>; <xref ref-type="bibr" rid="bib7">Biegel et al., 2011</xref>). Given the high demand for reduced ferredoxin in the cell, we have considered that both the complex I and the annotated NiFe hydrogenase may use the proton-motive force to produce reduced ferredoxin, which is a required electron donor for the FeFe hydrogenase and nitrogenase systems.</p></sec><sec id="s2-10"><title>The gut Melainabacteria may provide their host with B and K vitamins</title><p>The Melainabacteria genomes encode complete pathways for biosynthesis of vitamins B2 (riboflavin), B3 (nicotinamide), B7 (biotin), and B9 (dihydrofolate). The gut types additionally make vitamin B5 (pantoate). We are unsure if the subsurface bacterium ACD20 can make vitamin B5 as it appears to lack the final enzyme required in the synthesis of vitamin B5, 2-dehydropantoate 2-reductase. ACD20 and one of the human gut types (MEL.A1) may also be able to synthesize vitamins K1 and K2. Cyanobacteria are capable of synthesizing the B and K vitamins as well (<xref ref-type="bibr" rid="bib32">Kanehisa et al., 2012</xref>). Germ-free animals raised aseptically, and which lack gut microbiota, have an increased nutrient requirement for B and K vitamins, suggesting that under normal conditions the mammalian gut microbiota are a source of these vitamins for the host (<xref ref-type="bibr" rid="bib4">Backhed et al., 2005</xref>). Hence, Melainabacteria may represent one of the microbial sources of the B and K vitamins for their hosts.</p></sec><sec id="s2-11"><title>ACD20 is capable of nitrogen fixation by a nitrogenase complex distinct from that in Cyanobacteria</title><p>Nitrogen fixation is a capacity common among Cyanobacteria and is accomplished via a nitrogenase complex (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>). While the human gut-derived melainabacterial genomes lack the genes required for a functional nitrogenase complex (<italic>nifD, nifK, and nifH</italic>), the ACD20 genome encodes these genes and the <italic>nifE</italic>, <italic>nifV</italic>, <italic>nifS</italic>, <italic>nifU, nifB, and nifB/X</italic> genes involved in nitrogen fixation. We confirmed that the ACD20 NifH protein sequence contains the required [4Fe/4S] cluster, all motifs for functionality, and a conserved lysine in position 15 responsible for ATP interaction. Therefore, it seems likely that ACD20 has the capacity to fix nitrogen. This ability has been proposed to account for the increased dominance of <italic>Geobacter</italic> species under ammonium limiting conditions created during acetate stimulated U(VI) bioremediation of the same subsurface aquifer (<xref ref-type="bibr" rid="bib50">Mouser et al., 2009</xref>). The intestinal relatives do not have nitrogenase capabilities. (<xref ref-type="fig" rid="fig4">Figure 4</xref>, <xref ref-type="supplementary-material" rid="SD6-data">Supplementary file 1</xref>).</p><p>When placed in phylogenetic context with 865 <italic>nifH</italic> gene sequences (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>), the <italic>nifH</italic> gene from ACD20 (<xref ref-type="fig" rid="fig6">Figure 6</xref>, red) does not cluster with the primary cyanobacterial <italic>nifH</italic> genes (<xref ref-type="fig" rid="fig6">Figure 6</xref>, green) in <italic>nifH</italic> group I, but is affiliated with <italic>nifH</italic> group III. Group III is composed of sequences from phylogenetically distant organisms, many of which are obligate anaerobes (e.g., <italic>Clostridium</italic> species, sulfate-reducers, and methanogens) (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>) (<xref ref-type="fig" rid="fig6">Figure 6</xref>) as well as <italic>nifH</italic> from some Cyanobacteria (e.g., <italic>Microcoleus chthonoplastes</italic> PCC7420 and <italic>Anabaena variabillis</italic> ATCC 29413, which have secondary copies of <italic>nifH</italic>; <xref ref-type="fig" rid="fig6">Figure 6</xref>, shown in green in group III). The phylogenetic placement within the <italic>nifH</italic> cluster III was robust to alignment curation method (manual or automatic with GBLOCKS), and the ACD20 <italic>nifH</italic> sequence was never monophyletic with cyanobacterial sequences. The best hit to the ACD20 <italic>nifH</italic> sequence is a group III nitrogenase annotated from <italic>Methanoregula boonei</italic> 6A8 (ABS56522). These results indicate that the melainabacterial nitrogenase is not related to the primary nitrogenase in Cyanobacteria.<fig id="fig6" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.016</object-id><label>Figure 6.</label><caption><title>The phylogeny of the Melainabacteria nitrogenase.</title><p>A maximum likelihood phylogenetic tree constructed with 865 nitrogenase <italic>nifH</italic> genes from sequenced genomes (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>) is shown. <italic>nifH</italic> groups I and III are shown. The ACD20 <italic>nifH</italic> (in group III) is denoted in red, while photosynthetic cyanobacterial <italic>nifH</italic> sequences (in groups I and III) are denoted in green. Relative to group I, group III is characterized by deep bifurcations and long-branch lengths (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>), which are represented in the constructed tree by low-bootstrap values (<50) for internal branch positions in group III. ACD20 sequences are monophyletic (but with low bootstrap support) with <italic>nifH</italic> sequences from anaerobic <italic>Clostridium</italic> and <italic>Fusobacterium</italic> species.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.016">http://dx.doi.org/10.7554/eLife.01102.016</ext-link></p></caption><graphic xlink:href="elife01102f006"/></fig></p><p>Further distinctions between the ACD20 and cyanobacterial nitrogen metabolism include how nitrogen is assimilated. The large and small subunit of the glutamine oxoglutarate aminotransferase (GOGAT) system, responsible for nitrogen assimilation in Melainabacteria, are NADP<sup>+</sup>-based. This finding distinguishes Melainabacteria from known Cyanobacteria, which use either a 3Fe-4S ferredoxin-dependent monomeric enzyme or a two subunit NADH-dependent GOGAT (<xref ref-type="bibr" rid="bib52">Muro-Pastor et al., 2005</xref>). While a common nitrogenase could have existed in the ancestor of Melainabacteria and Cyanobacteria, the extant capacity to fix and assimilate nitrogen appears to have been acquired independently in Cyanobacteria and this sibling lineage.</p></sec><sec id="s2-12"><title>Melainabacteria are flagellated</title><p>Unlike Cyanobacteria, the organisms studied here are flagellated. All four of the analyzed genomes contain genes for flagella production, and all but one of the analyzed gut genomes (MEL.A1) contains the requisite genes to produce a fully functional flagellum (<xref ref-type="supplementary-material" rid="SD6-data">Supplementary file 1</xref>). The flagella are composed of the M, S, P, and L rings, as expected given a Gram-negative cell envelope (<xref ref-type="bibr" rid="bib16">DePamphilis and Adler, 1971</xref>). At least one copy of the flagellin protein in each Melainabacteria genome contains the eight amino acid sequence recognized by Toll-like receptor 5 (<xref ref-type="bibr" rid="bib2">Andersen-Nissen et al., 2005</xref>), indicating an ability to interact with the host immune system (<xref ref-type="fig" rid="fig7">Figure 7</xref>). Trees built using flagellum-related gene sequences show that the genes branch deeply with the Firmicutes and Spirochaetes (<xref ref-type="fig" rid="fig8">Figure 8</xref>), arguably the two most basal bacterial lineages (<xref ref-type="bibr" rid="bib15">Daubin et al., 2002</xref>; <xref ref-type="bibr" rid="bib9">Ciccarelli et al., 2006</xref>). This result suggests that the common ancestor of Cyanobacteria and Melainabacteria may have been flagellated. Given that no flagellated Cyanobacteria are known and cyanobacterial motility is accomplished by gliding or twitching (<xref ref-type="bibr" rid="bib60">Schaechter, 2010</xref>), flagella may have been non-essential to the cyanobacterial lifestyle and hence lost. However, it is not possible to rule out the alternative of flagella being acquired by Melainabacteria after the divergence from Cyanobacteria.<fig id="fig7" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.017</object-id><label>Figure 7.</label><caption><title>Putative TLR5 activation region in Melainabacteria flagellin genes.</title><p>Protein sequence alignment of residues 88–103 (<italic>Escherichia coli</italic> coordinates) for the flagellin genes. The range of residues required for TLR5 activation (<xref ref-type="bibr" rid="bib2">Andersen-Nissen et al., 2005</xref>) are indicated by the top bracket. Sequences are organized by similarity within these residues. Species whose flagellin are reported (<xref ref-type="bibr" rid="bib2">Andersen-Nissen et al., 2005</xref>) to be recognized (R) or unrecognized (UR) by TLR5 are noted. Based on the visualization of the alignment, flagellin genes predicted to be recognized or unrecognized by TLR5 are indicated; genes of ambiguous TLR5 recognition status are unmarked.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.017">http://dx.doi.org/10.7554/eLife.01102.017</ext-link></p></caption><graphic xlink:href="elife01102f007"/></fig><fig id="fig8" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.018</object-id><label>Figure 8.</label><caption><title>Phylogeny of flagella-related genes.</title><p>Supertree (cladogram) of 13 bootstrap ML trees of the flagellar genes shared among the four analyzed genomes. The phylum (or more specific taxonomic identifier) of each species is listed: (F) Firmicutes, (S) Spirochaetes, (E-P) Epsilonproteobacteria, (MEL) Melainabacteria, (A) Aquificae, (T) Thermotogae, (D-P) Deltaproteobacteria, (Pl) Planctomycetes, (B) Bacteroidetes, (A-P) Alphaproteobacteria, (B-P) Betaproteobacteria, (G-P) Gammaproteobacteria. In all 13 individual trees, Melainabacteria branched with Firmicutes and Spirochaetes.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.018">http://dx.doi.org/10.7554/eLife.01102.018</ext-link></p></caption><graphic xlink:href="elife01102f008"/></fig></p></sec><sec id="s2-13"><title>The gut Melainabacteria have reduced genomes</title><p>The gut–associated clade of the Melainabacteria, which diverged from the sediment-associated lineage, appears to have undergone genome reduction relative to ACD20 (<xref ref-type="fig" rid="fig3">Figure 3</xref>, <xref ref-type="table" rid="tbl2">Table 2</xref>). The gut types lack genes for chemotaxis, production of some amino acids (aspartic acid, asparagine, phenylalanine, arginine, histidine, tyrosine, and, in MEL.A1 and MEL.B1, alanine), a type I secretion system, nitrogen fixation, and genes for additional energy generation by substrate-level phosphorylation and for the production of acetate and perhaps butyrate as fermentation end-products. Note that in ACD20, genes for four of the five steps in butyrate synthesis were identified. The fifth step, which could be carried out by butyrate kinase and phosphotransbutyrylase or butyryl-coenzyme A (CoA): acetate CoA-transferase (as occurs in anaerobic bacteria) was not detected. An alternative enzyme, analogous to the process used by some Archaea (e.g., <italic>Pyrococcus</italic> species), acetate CoA ligase (<xref ref-type="bibr" rid="bib44">Mai and Adams, 1996</xref>), found in ACD20, may substitute to convert butyryl CoA to butyrate.</p><p>Genome reduction has been observed in symbiotic bacteria and may have occurred in Melainabacteria during adaptation to life in the animal gut. The lack of the filament, filament cap, hook-filament junction, and L and P rings required for flagellum biosynthesis in the genome assembled into a single scaffold (MEL.A1) indicates that these genes were likely not missed in the genome sequencing and assembly but rather that further genome reduction may be ongoing in the gut-associated clade.</p></sec><sec id="s2-14"><title>Distribution of Melainabacteria in soil, water, and animal habitats</title><p>To update our view of the ecological niches for Melainabacteria, we searched for 16S rRNA gene sequences in recent datasets (<xref ref-type="supplementary-material" rid="SD5-data">Figure 9—source data 1</xref>). We detected the water-soil-sediment (non-animal associated) clade in a wide variety of environments (<xref ref-type="fig" rid="fig9">Figure 9</xref>), with the highest abundances in municipal water. An analysis of the Human Microbiome Project (<xref ref-type="bibr" rid="bib11">Consortium HMP, 2012</xref>) and other datasets (<xref ref-type="supplementary-material" rid="SD5-data">Figure 9—source data 1</xref>) showed that across the human body, they occur as rare members of skin, airway, and mouth communities, but are most abundant in fecal samples (<xref ref-type="fig" rid="fig9">Figure 9B</xref>). The gut types were also detected in fecal samples from a range of other mammalian species, with the highest levels in herbivores, consistent with a role in fermentation of dietary substrates (<xref ref-type="fig" rid="fig9">Figure 9C</xref>). Within the herbivores, Melainabacteria are more abundant in feces obtained from foregut than hindgut fermenters (two tailed <italic>t</italic>-test, p=0.026). Within the Human Microbiome Project dataset (<xref ref-type="bibr" rid="bib11">Consortium HMP, 2012</xref>), approximately 10% of the samples contained melainabacterial 16S rRNA gene sequences, providing a rough estimate as to what fraction of the American population carries Melainabacteria. When comparing three global human populations (<xref ref-type="bibr" rid="bib76">Yatsunenko et al., 2012</xref>), we observed the highest abundances in fecal samples obtained from predominantly vegetarian Malawian and Venezuelan individuals (<xref ref-type="fig" rid="fig9">Figure 9C</xref>). These observations suggest that Melainabacteria also play a role in fermentation of dietary plant polysaccharides in humans.<fig id="fig9" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.019</object-id><label>Figure 9.</label><caption><title>The prevalence of members of Melainabacteria in different environments, including distinct human body habitats.</title><p>In all three panels, the relative abundances of Melainabacteria in different samples types are plotted as box plots (log<sub>10</sub> transformed; i.e., 10<sup>−1</sup> = 0.1%). Data were obtained from the QIIME database, derive from a variety of studies, and are publically available (<xref ref-type="supplementary-material" rid="SD5-data">Figure 9—source data 1</xref>): (<bold>A</bold>) soil, sediment, and water sites, (<bold>B</bold>) different human body sites (GI = gastrointestinal), (<bold>C</bold>, left) mammal stool classified by host diet, (<bold>C</bold>, right) country of origin for human stool. UD = undetermined.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.019">http://dx.doi.org/10.7554/eLife.01102.019</ext-link></p><p><supplementary-material id="SD5-data"><object-id pub-id-type="doi">10.7554/eLife.01102.020</object-id><label>Figure 9—source data 1.</label><caption><title>16S rRNA gene sequence datasets used to analyze the sources of Melainabacteria.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.020">http://dx.doi.org/10.7554/eLife.01102.020</ext-link></p></caption><media mime-subtype="docx" mimetype="application" xlink:href="elife01102s005.docx"/></supplementary-material></p></caption><graphic xlink:href="elife01102f009"/></fig></p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>High throughput sequencing and bioinformatic methods enabled recovery of four complete and four partial genomes for relatively rare members of complex microbial communities from the adult human gut and groundwater. Our approach used standard metagenomic sequencing and assembly approaches, augmented by manual curation. Recovery of complete genomes was facilitated by the uniqueness of the Melainabacteria genomes, which were distinct not only from the genomes of all other organisms in the community but also were differentiable among coexisting Melainabacteria. This approach has the potential to replace culture-based, single cell, and flow cytometry-based genomics methods, which are costlier and more laborious due to the requirement for cell manipulation. Moreover, the amplification step in single cell genomics often results in partial genomes (<xref ref-type="bibr" rid="bib45">Marcy et al., 2007</xref>; <xref ref-type="bibr" rid="bib55">Podar et al., 2007</xref>; <xref ref-type="bibr" rid="bib77">Youssef et al., 2011</xref>) and chimeras (<xref ref-type="bibr" rid="bib82">Zhang et al., 2006</xref>).</p><p>Traditionally, novel bacterial 16S rRNA sequences have been assigned to new candidate phyla based on low levels of sequence identity to existing phyla, and use of this criterion supports the assignment of these bacteria to a new phylum distinct from Cyanobacteria (<xref ref-type="bibr" rid="bib30">Hugenholtz et al., 1998</xref>); here we suggest reclassification of sequences previously referred to as deep branching Cyanobacteria, YS2, SM1D11, or mle1-12 to the candidate phylum Melainabacteria. Similarly, ACD20 should be assigned to the Melainabacteria candidate phylum. Both 16S rRNA and concatenated ribosomal protein phylogenetic trees support common ancestry for the Melainabacteria and the Cyanobacteria lineages. Melainabacteria are, therefore, not only important members of the human gut but their unique evolutionary placement also provides novel insight into the evolution of Cyanobacteria.</p><p>Our metabolic reconstruction indicates that the Melainabacteria are anaerobic, obligate fermenters capable of utilizing diverse carbon sources, which are imported as well as derived from internal carbon sources. Melainabacteria lack a linked electron transport chain but have multiple methods for generating a membrane potential, which can then produce ATP via an ATP synthase. Distinct from Cyanobacteria (which have NiFe hydrogenases [<xref ref-type="bibr" rid="bib68">Tamagnini et al., 2007</xref>]), the Melainabacteria cells use FeFe hydrogenases for H<sub>2</sub> production and may require an H<sub>2</sub>-sink (e.g., a coexisting acetogen, methanogen, or other respiring partner) to maintain low partial H<sub>2</sub> pressures.</p><p>The metabolic profile and nutritional requirements of Melainabacteria uncovered here provide guidance into how these bacteria may be cultured. Efforts to culture Melainabacteria should focus on providing a diverse array of sugars and carbohydrates, dicarboxylic acids, and a complete set of amino acids in an anaerobic, dark environment. Moreover, given their production of H<sub>2</sub>, these strategies should include removal of hydrogen through use of an H<sub>2</sub>-consuming syntrophic partner.</p><p>We infer that the protein sequences of the flagellin genes of Melainabacteria are recognized by Toll-like receptor 5 (as are flagellins of some commensal Firmicutes in the mammalian gut) that mediates interaction with the host. The human gut Melainabacteria may provide their host with B and K vitamins and fill a niche based on carbon fermentation in the large intestine, analogous to that of the Firmicutes. Based on their distribution in animals and humans living on different continents and consuming different diets, we hypothesize that the gut Melainabacteria flourish in the presence of plant polysaccharides and may aid in the digestion of plant fibers.</p><p>Oxygenic photosynthesis, a unique form of photosynthesis that originated in the cyanobacterial lineage, was one of the most profound innovations of life on Earth as it induced planetary-scale geochemical changes and facilitated the evolution of plants via endosymbiosis of cyanobacterial cells (<xref ref-type="bibr" rid="bib58">Sagan, 1967</xref>; <xref ref-type="bibr" rid="bib6">Bekker et al., 2004</xref>; <xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>). We infer that this capacity did not arise prior to the divergence of Melainabacteria from Cyanobacteria, based on the absence of remnants of photosynthetic machinery in the Melainabacteria genomes. This observation is in contrast to the symbiotic photoheterotroph UCYN-A, which is the sole representative of Cyanobacteria unable to perform oxygenic photosynthesis due to loss of photosystem II (<xref ref-type="bibr" rid="bib69">Tripp et al., 2010</xref>). A similar loss of photosynthesis appears to have occurred in the nitrogen-fixing endosymbiont of the algae <italic>Rhopalodia gibba</italic> (<xref ref-type="bibr" rid="bib34">Kneip et al., 2008</xref>). The presence of a light stress response regulator (NblS) and circadian rhythm regulator (RpaA and RpaB) homologs in this sibling phylum suggests that the cyanobacterial ability to sense and use light was preceded and perhaps facilitated by response regulator systems present in the proto-Cyanobacteria.</p><p>The timing of the evolution of nitrogen fixation in Cyanobacteria has been debated (<xref ref-type="bibr" rid="bib59">Sanchez-Baracaldo et al., 2005</xref>; <xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>; <xref ref-type="bibr" rid="bib63">Shi and Falkowski, 2008</xref>; <xref ref-type="bibr" rid="bib36">Larsson et al., 2011</xref>; <xref ref-type="bibr" rid="bib37">Latysheva et al., 2012</xref>). Like Cyanobacteria, the subsurface Melainabacteria (ACD20) is capable of nitrogen fixation but its nitrogenase genes appear to be unrelated. Though we cannot rule out the existence of an ancient nitrogenase common in both lineages, this finding suggests that acquisition of nitrogen fixation occurred after Cyanobacteria and Melainabacteria diverged. This sequence of events is in agreement with other theories, which place the development of photosynthesis before nitrogen fixation (<xref ref-type="bibr" rid="bib59">Sanchez-Baracaldo et al., 2005</xref>; <xref ref-type="bibr" rid="bib63">Shi and Falkowski, 2008</xref>). Thus, we speculate that Cyanobacteria gained the capacities for oxygenic photosynthesis and nitrogen fixation and lost or never gained flagella after diverging from Melainabacteria. Melainabacteria on the other hand, gained or maintained flagella, a fermentation system, and an array of carbon transporters. One environmental (non-gut) lineage of Melainabacteria acquired nitrogen fixation, whereas the gut lineage capitalized on its ability to ferment diverse carbon sources, including those recalcitrant to host digestion.</p><p>We conclude that the common ancestors of Cyanobacteria and Melainabacteria may have been Gram-negative flagellated bacteria active in the anaerobic carbon cycle, producing H<sub>2</sub>, but not fixing nitrogen. Although this characterization does not clarify the origins of photosynthesis, it is consistent with the idea that the ancestors of Cyanobacteria were anoxygenic photosynthetic organisms with a photosystem I-like reaction center (<xref ref-type="bibr" rid="bib51">Mulkidjanian et al., 2006</xref>). The inferred lifestyle is consistent with the widely accepted hypothesis that the common ancestor of extant bacteria relied on anaerobic decomposition of organic material by substrate-level phosphorylation (<xref ref-type="bibr" rid="bib24">Egami, 1977</xref>) and was likely metabolically similar to the Firmicutes (<xref ref-type="bibr" rid="bib15">Daubin et al., 2002</xref>; <xref ref-type="bibr" rid="bib9">Ciccarelli et al., 2006</xref>).</p><sec id="s3-1"><title>Prospectus</title><p>The reconstruction of genomes from uncultivated bacteria from human gut and subsurface environmental metagenomes has enabled us to describe Melainabacteria, a novel candidate phylum sibling to Cyanobacteria, and to further elucidate the evolutionary history of one of the Earth’s most important bacterial phyla. Our results suggest that while photosynthesis probably developed in Cyanobacteria after the separation from Melainabacteria, the evolution of light related capabilities may have been enabled by regulators present in their common ancestor. The ability to fix nitrogen appears to have developed separately in Cyanobacteria and the water-soil-sediment clade of Melainabacteria. The role of the Melainabacteria in the human gut is one of an obligate fermenter, and its enrichment in human subjects and animals with a plant-rich diet likely relates to a prominent role of the Melainabacteria in the processing of plant fibers. As plant fibers have been minimized in Western diets, the Melainabacteria may be regarded as part of the microbiota that is disappearing from modernized populations (<xref ref-type="bibr" rid="bib8">Blaser and Falkow, 2009</xref>). The metabolic reconstructions made from these genomes should guide efforts into obtaining Melainabacteria in culture, which would allow a better understanding of this important human symbiont.</p></sec></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><sec id="s4-1"><title>Human gut sample collection</title><p>The three fecal samples from healthy adults (A, B, and C) were collected under Cornell University IRB (Protocol ID 1108002388) from the United Kingdom Adult Twin Registry (TwinsUK). Samples were collected in 15 ml conical tubes, refrigerated for 1–2 days, and then stored at −80°C at King’s College London until being shipped on dry ice to Cornell University, where they were subsequently stored at −80°C. Approximately 100 mg of sample was processed with PowerSoil DNA isolation kit (MoBio Laboratories Ltd, Carlsbad, CA) to isolate genomic DNA.</p></sec><sec id="s4-2"><title>Fecal sample DNA sequencing</title><p>Construction of three shotgun genomic libraries and sequencing were carried out at the WM Keck Center for Comparative and Functional Genomics, Roy J Carver Biotechnology Center, University of Illinois at Urbana-Champaign. The barcoded DNAseq libraries were prepared with Illumina’s ‘TruSeq DNAseq Sample Prep kit’ (Illumina, San Diego, CA). The final libraries were quantitated with Qubit (Life Technologies, Grand Island, NY), and the average size was determined on an Agilent bioanalyzer High-Sensitivity DNA chip (Agilent Technologies, Wilmington, DE), diluted to 10 nM and pooled. The 10 nM dilution was further quantitated by qPCR on an ABI 7900 (Life Technologies).</p><p>The pooled libraries were sequenced on one lane of a flowcell for 101 cycles from each end of the fragments on a HiSeq2000 using TruSeq SBS sequencing kit version 3, and the fastq files were generated with Casava1.8.2. Overall, sequencing yielded a total of 346 million reads (34.6 Gbp, almost 12 Gbp per sample on average).</p></sec><sec id="s4-3"><title>Aquifer sample collection and sequencing</title><p>Details relating to the collection, sequencing, and data analysis for the ACD20 genome are provided in (<xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). This publication focused on members of the OP11, OD1, PER, ACD80, and BD1-5 candidate bacterial phyla and included no discussion of the ACD20 genome.</p></sec><sec id="s4-4"><title>Genome assembly</title><p>We reconstructed four complete (MEL.A1, MEL.B1, MEL.B2, and MEL.C1), one near-complete (MEL.C2), and two partial (MEL.A2 and MEL.C3) genomes from the three human fecal samples. An eighth near-complete genome (ACD20) was recovered from a different dataset (see <xref ref-type="bibr" rid="bib74">Wrighton et al. (2012)</xref> for genome assembly details). Potential metagenomic sequences belonging to Melainabacteria were determined by similarity to Cyanobacteria or by being completely novel. These scaffold fragments were linked to other fragments by coverage, %GC, and paired-end read information using the assembly curation steps and scripts previously described (<xref ref-type="bibr" rid="bib62">Sharon et al., 2013</xref>). Genome MEL.A1 was assembled from human fecal sample A using Velvet (<xref ref-type="bibr" rid="bib80">Zerbino and Birney, 2008</xref>) with parameters optimized based on the expected genome coverage. Genome MEL.A2 was recovered from an IDBA-UD (<xref ref-type="bibr" rid="bib54">Peng et al., 2012</xref>) assembly for human fecal sample A based on a phylogenetic profile of hits for the scaffolds and genes. Genomes MEL.B1 and MEL.B2 were reconstructed from human fecal sample B and genomes MEL.C1, MEL.C2, and MEL.C3 from human fecal sample C. These genomes were assembled using the IDBA-UD assembler. Identification of scaffolds belonging to the target Melainabacteria genome was aided by utilizing similarity to genes in MEL.A1. Scaffolds not belonging to MEL.C1 or MEL.C2 were identified as belonging to genome MEL.C3 when more than 50% of the best hits for its genes were to genes in the other Melainabacteria genomes and not from other published genomes.</p><p>Genome completeness was assessed as follows: Complete genomes have a complete set of single copy genes (<xref ref-type="bibr" rid="bib57">Raes et al., 2007</xref>) (<xref ref-type="fig" rid="fig10">Figure 10</xref>), and the linkage between all scaffolds is established; Near complete genomes have a complete set of single copy genes (<xref ref-type="fig" rid="fig10">Figure 10</xref>), and the linkage between almost all scaffolds is established; Partial genomes lack a complete set of single copy genes, and/or scaffold linkage is lacking. The newly sequenced gut genomes are available at <ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/mel/organisms">http://ggkbase.berkeley.edu/mel/organisms</ext-link>.<fig id="fig10" position="float"><object-id pub-id-type="doi">10.7554/eLife.01102.021</object-id><label>Figure 10.</label><caption><title>Single copy gene inventory from reconstructed genomes.</title><p>Data are based on single copy genes (numbers in circles indicate the number of copies found).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.021">http://dx.doi.org/10.7554/eLife.01102.021</ext-link></p></caption><graphic xlink:href="elife01102f010"/></fig></p></sec><sec id="s4-5"><title>Concatenated ribosomal protein phylogeny</title><p>A core group of 16 syntenic ribosomal proteins was selected based on published metrics of lateral gene transfer frequencies (<italic>rpL2</italic>, <italic>3</italic>, <italic>4</italic>, <italic>5</italic>, <italic>6</italic>, <italic>14</italic>, <italic>15</italic>, <italic>16</italic>, <italic>18</italic>, <italic>22</italic>, <italic>24</italic>, and <italic>rpS3</italic>, <italic>8</italic>, <italic>10</italic>, <italic>17</italic>, <italic>19</italic>) (<xref ref-type="bibr" rid="bib65">Sorek et al., 2007</xref>; <xref ref-type="bibr" rid="bib75">Wu and Eisen, 2008</xref>). Reference datasets were obtained from the PhyloSift database (<xref ref-type="bibr" rid="bib13">Darling et al., 2012</xref>). The NCBI and JGI IMG databases were mined for the 16 ribosomal proteins from recently sequenced genomes from the Cyanobacteria, Chloroflexi, Nitrospira, and TM7 phyla. The 16 syntenic ribosomal protein genes were identified in the eight new genomes, translated, and included for phylogenetic placement. The complete dataset contained 733 taxa. Each individual protein dataset was aligned using Muscle version 3.8.31 (<xref ref-type="bibr" rid="bib23">Edgar, 2004</xref>) and then manually curated to remove end gaps and single-taxon insertions. Model selection for evolutionary analysis was determined using ProtTest3 (<xref ref-type="bibr" rid="bib14">Darriba et al., 2011</xref>) for each single protein alignment. The curated alignments were concatenated to form a 16-protein, 733 taxa alignment with 3,082 unambiguously aligned positions. A maximum likelihood phylogeny for the concatenated alignment was conducted using Phyml (<xref ref-type="bibr" rid="bib25">Guindon and Gascuel, 2003</xref>) under the LG+αG model of evolution and with 100 bootstrap replicates.</p></sec><sec id="s4-6"><title>16S rRNA gene phylogeny</title><p>16S rRNA gene sequences from MEL.A1, MEL.B1, and MEL.B2 were aligned using NAST (<xref ref-type="bibr" rid="bib47">McDonald et al., 2012</xref>). An aligned full-length sequence set was created for five representative sequences from each phylum in the Greengenes (May 2011) database (<xref ref-type="bibr" rid="bib17">DeSantis et al., 2006</xref>) plus the three Melainabacteria sequences. For the Cyanobacteria-Melainabacteria specific tree, the aligned sequence set was composed of the three Melainabacteria sequences and one representative sequence from each order within Cyanobacteria except for orders YS2, SM1D11, and mle1-12, from which we included all sequences as these are most closely related to the Melainabacteria sequences. Phylogenetic trees were constructed using maximum likelihood implemented in RAxML (<xref ref-type="bibr" rid="bib67">Stamatakis, 2006</xref>) and using the GTR+γ+I model of evolution and 100 bootstrap resamplings. The trees were rooted using five archaeal sequences. Trees were visualized using the Interactive Tree of Life (iTOL) (<xref ref-type="bibr" rid="bib38">Letunic and Bork, 2007</xref>, <xref ref-type="bibr" rid="bib39">2011</xref>).</p></sec><sec id="s4-7"><title>RnpB structure analysis</title><p>RnpB RNAs were identified by searching for matches to 5′ GAGGAAAGUCC 3′, which is highly conserved in RNase P RNAs (<xref ref-type="bibr" rid="bib26">Haas et al., 1991</xref>), as well as the surrounding intergenic region. Matches were analyzed in Bcheck (<xref ref-type="bibr" rid="bib78">Yusuf et al., 2010</xref>) to determine the structure and type of the RnpB RNAs.</p></sec><sec id="s4-8"><title>Choice of genomes used for metabolic analyses</title><p>The following four genomes were used for the metabolic analyses: ACD20, MEL.A1, MEL.B1, and MEL.B2. Two partial genomes (MEL.A2 and MEL.C3) were excluded from this analysis because of reliance on clusters of orthologs from all participating genomes. MEL.C1 and MEL.C2 were not included in these analyses because of their similarity to MEL.A1 and MEL.B1, respectively.</p></sec><sec id="s4-9"><title>Construction of Melainabacteria clusters of orthologous genes (MEL-COGs)</title><p>Clusters of orthologs (920 MEL-COGs in total, <xref ref-type="supplementary-material" rid="SD4-data">Figure 5—source data 1</xref>) for four of the Melainabacteria genomes (ACD20, MEL.A1, MEL.B1, and MEL.B2) were constructed by (1) BLASTing each protein from each genome against the proteome of the other genomes individually, (2) connecting proteins between pairs of genomes by reciprocal best BLAST hits, and (3) forming clusters of these reciprocal best hits when groups of four proteins (one from each genome) had at least two proteins connected to all three other members of the group, and the rest were connected to at least two other members of the group. The COGs were annotated with (I) phylogenetic origin according to best matching phylum in a BLAST search of NCBI’s nr database and (II) KEGG category according to the BLAST match of the ACD20 homolog in the KEGG database.</p></sec><sec id="s4-10"><title>Phylogenetic assignments of MEL-COGs</title><p>Phylogenetic assignments were decided based on majority voting for all four genes of the MEL-COG (one gene from each of the four Melainabacteria genomes analyzed [ACD20, MEL.A1, MEL.B1, and MEL.B2]). First, a phylum was assigned for each single gene. Then, the phylum assigned to majority of the four MEL-COG genes was assigned to the MEL-COG. If none of the MEL-COG genes received a significant hit to an existing gene in another phylum, the MEL-COG was assigned ‘No hit’. If more than one phylum shared the highest number of assignments, or several MEL-COG genes had a ‘Multiple’ assignment from #3 below, the MEL-COG was assigned ‘Multiple’.</p><p>Phylum assignment to each individual gene in a MEL-COG was accomplished using homology to genes belonging to other phyla using the following workflow: (1) MEL-COG genes were BLASTed against NCBI’s nr database; (2) The top two hits with e-value ≤ 1e-5 were collected; and (3) The alignments of these genes with the MEL-COG gene were compared, considering both alignment-length and % identity: each alignment was represented by the total number of identical positions, calculated as (alignment-length) × (% identity). The top hit was considered to be a significantly better match if its number of identical positions was at least 5% more than the number of identical positions for the second best matching gene. In that case, the phylum of the best hit was assigned to the MEL-COG gene; otherwise the MEL-COG gene received the ‘Multiple’ assignment.</p></sec><sec id="s4-11"><title>KEGG category assignment of MEL-COGs</title><p>The ACD20 representative in each MEL-COG was BLASTed against the Kyoto Encyclopedia of Genes and Genomes (KEGG) database (5). The KEGG term for the best hit was assigned to the MEL-COG. If no hits were found, the MEL-COG assignment was ‘unknown’, else all pathways to which the KEGG term belongs were identified, as well as their KEGG category. Assignment was: (a) ‘Undecided’—if the KEGG term did not belong to any pathway; (b) ‘Multiple’—if the pathways to which the KEGG term belongs were from multiple categories; or, (c) the category of the term’s pathway, if all pathways belonged to the same category.</p></sec><sec id="s4-12"><title>Metabolic pathways</title><p>KEGG annotations for the ACD20, MEL.A1, MEL.B1, and MEL.B2 genomes were uploaded to the KEGG database. Pathways were visually inspected for completeness, reactants, and products. All annotations were confirmed by manual inspection, including confirmation of active residues and phylogenetic tree analyses. Phylogenetic trees were constructed using protein alignments as described in detail (<xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). Confirmation and cross-genome comparisons were constructed by using the LIST and GENOME SUMMARY features in the ggKbase website (<ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/genome_summaries/81-MEL-Metabolic-Overview-June2013">http://ggkbase.berkeley.edu/genome_summaries/81-MEL-Metabolic-Overview-June2013</ext-link>). Genomic information from the Melainabacteria dataset is stored in the publically accessible ggKbase database (<ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/mel/organisms">http://ggkbase.berkeley.edu/mel/organisms</ext-link>).</p></sec><sec id="s4-13"><title>Nitrogenase complex phylogenetic analysis</title><p>Phylogenetic analyses of the <italic>nifH</italic> gene constructed with a database of 865 <italic>nifH</italic> sequences from genome-sequenced Bacteria and Archaea (<xref ref-type="bibr" rid="bib79">Zehr et al., 2003</xref>). Two separate phylogenetic trees were constructed with and without the use of GBLOCKS in the pipeline reported previously (<xref ref-type="bibr" rid="bib74">Wrighton et al., 2012</xref>). <xref ref-type="fig" rid="fig6">Figure 6</xref> is based upon the GBLOCKS alignment. Maximum likelihood phylogenetic trees were produced using RAxML version 7.3.0 using the rapid bootstrap analysis and the general time reversible model of nucleotide substitution with optimization and categorization of per-site substitution rates on 500 distinct trees (raxmlHPC -f a -m GTRCAT -x 1234 -N 500) (<xref ref-type="bibr" rid="bib67">Stamatakis, 2006</xref>) and visualized in iTOL (<xref ref-type="bibr" rid="bib38">Letunic and Bork, 2007</xref>, <xref ref-type="bibr" rid="bib39">2011</xref>).</p></sec><sec id="s4-14"><title>Flagellum-related gene set phylogenetic analysis</title><p>Phylogenetic trees were built from the 13 flagellum-related genes in the MEL-COG list (the ACD20 homolog is listed): ACD20_20398.28785.13G0015 (<italic>flgG</italic>), ACD20_20398.28785.13G0016 (<italic>flgG</italic>), ACD20_20398.28785.13G0018 (<italic>flgC</italic>), ACD20_20398.28785.13G0021 (<italic>fliF</italic>), ACD20_20398.28785.13G0023 (<italic>fliH</italic>), ACD20_20398.28785.13G0024 (<italic>flhA</italic>), ACD20_20398.28785.13G0025 (<italic>fliI</italic>), ACD20_20398.28785.13G0028 (<italic>flgE</italic>), ACD20_26563.5896.13G0002 (<italic>flhA</italic>), ACD20_26723.8006.14G0006 (<italic>fliP</italic>), ACD20_26723.8006.14G0007 (<italic>fliQ</italic>), ACD20_26723.8006.14G0009 (<italic>fliR</italic>), ACD20_29089.28969.14G0027 (<italic>flhB</italic>). For each gene, the ACD20 homolog was used in a pBLAST search (e-value <10<sup>−5</sup>) against the 41 flagellated bacterial species used in (<xref ref-type="bibr" rid="bib42">Liu and Ochman, 2007</xref>). Matching protein sequences across the queried species were aligned using MUSCLE version 3.6 (<xref ref-type="bibr" rid="bib23">Edgar, 2004</xref>). Protein alignments were converted to DNA alignments. Phylogenetic trees were produced by maximum likelihood using RAxML version 7.3.0, executing the rapid bootstrap analysis and the general time reversible model of nucleotide substitution with optimization and categorization of per-site substitution rates (50 rate categories) on 1000 distinct trees (raxmlHPC -e 0.1 -f a -c 50 -m GTRCAT -x 92957 -N 1000 -p 3212) (<xref ref-type="bibr" rid="bib67">Stamatakis, 2006</xref>). A supertree was built using heuristic searches in Clann (<xref ref-type="bibr" rid="bib12">Creevey and McInerney, 2005</xref>).</p></sec><sec id="s4-15"><title>Flagellin and Toll-like receptor 5</title><p>Flagellin genes were recovered from the ACD20, MEL.A1, MEL.B1, and MEL.B2 genomes. The flagellin gene MEL.B1.001_31 was excluded as this sequence is incomplete and does not align well to other flagellin sequences. Using MUSCLE (3.8) multiple sequence alignment (<xref ref-type="bibr" rid="bib23">Edgar, 2004</xref>), these sequences were aligned with representative flagellin genes from species whose flagellins are known to be either recognized or unrecognized by the mammalian Toll-like receptor 5 (<xref ref-type="bibr" rid="bib2">Andersen-Nissen et al., 2005</xref>) (gene names and Uniprot IDs are given): <italic>E. coli fliC</italic> (Q0GJI9), <italic>Bacillus subtilis hag</italic> (P02968), <italic>Salmonella</italic> Typhimurium <italic>fliC</italic> (P06179), <italic>Vibrio anguillarum flaC</italic> (Q56574), <italic>Listeria monocytogenes flaA</italic> (Q02551), <italic>Bartonella bacilliformis fla1</italic> (P35633), <italic>Campylobacter jejuni flaA</italic> (Q46113), <italic>Helicobacter pylori flaA</italic> (P0A0S1), <italic>Helicobacter felis flaA</italic> (Q9XB38), <italic>Wolinella succinogenes flaG</italic> (Q79HP6). For visualization purposes, the sequences were ordered by similarity within the TLR5 activation domain (<xref ref-type="bibr" rid="bib2">Andersen-Nissen et al., 2005</xref>). BOXSHADE (<ext-link ext-link-type="uri" xlink:href="http://www.ch.embnet.org/software/BOX_form.html">http://www.ch.embnet.org/software/BOX_form.html</ext-link>) was used to display the protein alignment.</p></sec><sec id="s4-16"><title>Meta-analysis of publically available 16S rRNA data</title><p>We built a reference dataset of environmental (non-gut) and gut associated Melainabacteria 16S rRNA gene sequences (<xref ref-type="fig" rid="fig2">Figure 2</xref>) obtained from the Greengenes database (<xref ref-type="bibr" rid="bib17">DeSantis et al., 2006</xref>) and queried the dataset at 97% ID against publicly available 16S rRNA gene datasets obtained from <ext-link ext-link-type="uri" xlink:href="http://www.microbio.me/qiime">http://www.microbio.me/qiime</ext-link> (<xref ref-type="supplementary-material" rid="SD5-data">Figure 9—source data 1</xref>). Similar samples types were combined (e.g., forest soil, grassland soil, shrubland combined as ‘soil’), and Melainabacteria sequence reads were tallied. Samples with zero Melainabacteria sequence reads were removed. Samples types (i.e., air) with fewer than five reads were not plotted.</p></sec></sec></body><back><ack id="ack"><title>Acknowledgements</title><p>We would like to thank Norman Pace and Kirk Harris for providing guidance on analyses of the 30S ribosomal protein S1 and of RnpB, Jonathan Eisen for making the PhyloSift database sequences available, Jonathan Zehr and Kendra Turk for providing the <italic>nifH</italic> reference sequences, and Roberto Kolter, Jonathan Zehr, and the anonymous reviewer for their comments on the manuscript.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>The authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>SCD, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con2"><p>IS, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con3"><p>KCW, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con4"><p>OK, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con5"><p>LAH, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con6"><p>JKG, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con7"><p>BCT, Contributed resources and tools</p></fn><fn fn-type="con" id="con8"><p>JTB, Contributed resources and tools</p></fn><fn fn-type="con" id="con9"><p>TDS, Contributed resources and tools</p></fn><fn fn-type="con" id="con10"><p>JFB, Conception and design, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con11"><p>REL, Conception and design, Analysis and interpretation of data, Drafting or revising the article</p></fn></fn-group><fn-group content-type="ethics-information"><title>Ethics</title><fn fn-type="other"><p>Human subjects: Informed consent and consent to publish was obtained for the human subjects in the TwinsUK project. Ethical approval was obtained and guidelines were followed in accordance with Cornell University IRB (Protocol ID 1108002388).</p></fn></fn-group></sec><sec sec-type="supplementary-material"><title>Additional files</title><supplementary-material id="SD6-data"><object-id pub-id-type="doi">10.7554/eLife.01102.022</object-id><label>Supplementary file 1.</label><caption><title>Genes belonging to pathways or assemblages referenced in the paper.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01102.022">http://dx.doi.org/10.7554/eLife.01102.022</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife01102s006.xlsx"/></supplementary-material><sec sec-type="datasets"><title>Major dataset</title><p>The following dataset was generated:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro1"><name><surname>Di Rienzi</surname><given-names>SC</given-names></name>, <name><surname>Sharon</surname><given-names>I</given-names></name>, <name><surname>Wrighton</surname><given-names>KC</given-names></name>, <name><surname>Koren</surname><given-names>O</given-names></name>, <name><surname>Hug</surname><given-names>LA</given-names></name>, <name><surname>Thomas</surname><given-names>BC</given-names></name>, <name><surname>Goodrich</surname><given-names>JK</given-names></name>, <name><surname>Bell</surname><given-names>JT</given-names></name>, <name><surname>Spector</surname><given-names>TD</given-names></name>, <name><surname>Banfield</surname><given-names>JF</given-names></name>, <name><surname>Ley</surname><given-names>RE</given-names></name>, <year>2013</year><x>, </x><source>The Melainabacteria Project</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/mel">http://ggkbase.berkeley.edu/mel</ext-link><x>, </x><comment>Publicly available at ggKBase (<ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/">http://ggkbase.berkeley.edu/</ext-link>).</comment></related-object></p><p>The following previously published dataset was used:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro2"><name><surname>Wrighton</surname><given-names>KC</given-names></name>, <name><surname>Thomas</surname><given-names>BC</given-names></name>, <name><surname>Sharon</surname><given-names>I</given-names></name>, <name><surname>Miller</surname><given-names>CS</given-names></name>, <name><surname>Castelle</surname><given-names>CJ</given-names></name>, <name><surname>Verberkmoes</surname><given-names>NC</given-names></name>, <name><surname>Wilkins</surname><given-names>MJ</given-names></name>, <name><surname>Hettich</surname><given-names>RL</given-names></name>, <name><surname>Lipton</surname><given-names>MS</given-names></name>, <name><surname>Williams</surname><given-names>KH</given-names></name>, <name><surname>Long</surname><given-names>PE</given-names></name>, <name><surname>Banfield</surname><given-names>JF</given-names></name>, <year>2012</year><x>, </x><source>The ACD Rifle project</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/Rifle_ACD">http://ggkbase.berkeley.edu/Rifle_ACD</ext-link><x>, </x><comment>Publicly available at ggKBase (<ext-link ext-link-type="uri" xlink:href="http://ggkbase.berkeley.edu/">http://ggkbase.berkeley.edu/</ext-link>).</comment></related-object></p></sec></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Altermann</surname><given-names>W</given-names></name><name><surname>Kazmierczak</surname><given-names>J</given-names></name></person-group><year>2003</year><article-title>Archean microfossils: a reappraisal of early life on earth</article-title><source>Res Microbiol</source><volume>154</volume><fpage>611</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1016/j.resmic.2003.08.006</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andersen-Nissen</surname><given-names>E</given-names></name><name><surname>Smith</surname><given-names>KD</given-names></name><name><surname>Strobe</surname><given-names>KL</given-names></name><name><surname>Barrett</surname><given-names>SL</given-names></name><name><surname>Cookson</surname><given-names>BT</given-names></name><name><surname>Logan</surname><given-names>SM</given-names></name><etal/></person-group><year>2005</year><article-title>Evasion of toll-like receptor 5 by flagellated bacteria</article-title><source>Proc Natl Acad Sci USA</source><volume>102</volume><fpage>9247</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1073/pnas.0502040102</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ashby</surname><given-names>MK</given-names></name><name><surname>Mullineaux</surname><given-names>CW</given-names></name></person-group><year>1999</year><article-title>Cyanobacterial ycf27 gene products regulate energy transfer from phycobilisomes to photosystems I and II</article-title><source>FEMS Microbiol Lett</source><volume>181</volume><fpage>253</fpage><lpage>60</lpage><pub-id pub-id-type="doi">10.1111/j.1574-6968.1999.tb08852.x</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Backhed</surname><given-names>F</given-names></name><name><surname>Ley</surname><given-names>RE</given-names></name><name><surname>Sonnenburg</surname><given-names>JL</given-names></name><name><surname>Peterson</surname><given-names>DA</given-names></name><name><surname>Gordon</surname><given-names>JI</given-names></name></person-group><year>2005</year><article-title>Host-bacterial mutualism in the human intestine</article-title><source>Science</source><volume>307</volume><fpage>1915</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1126/science.1104816</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Battchikova</surname><given-names>N</given-names></name><name><surname>Eisenhut</surname><given-names>M</given-names></name><name><surname>Aro</surname><given-names>EM</given-names></name></person-group><year>2011</year><article-title>Cyanobacterial NDH-1 complexes: novel insights and remaining puzzles</article-title><source>Biochim Biophys Acta</source><volume>1807</volume><fpage>935</fpage><lpage>44</lpage><pub-id pub-id-type="doi">10.1016/j.bbabio.2010.10.017</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bekker</surname><given-names>A</given-names></name><name><surname>Holland</surname><given-names>HD</given-names></name><name><surname>Wang</surname><given-names>PL</given-names></name><name><surname>Rumble</surname><given-names>D</given-names><suffix>III</suffix></name><name><surname>Stein</surname><given-names>HJ</given-names></name><name><surname>Hannah</surname><given-names>JL</given-names></name><etal/></person-group><year>2004</year><article-title>Dating the rise of atmospheric oxygen</article-title><source>Nature</source><volume>427</volume><fpage>117</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1038/nature02260</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Biegel</surname><given-names>E</given-names></name><name><surname>Schmidt</surname><given-names>S</given-names></name><name><surname>Gonzalez</surname><given-names>JM</given-names></name><name><surname>Muller</surname><given-names>V</given-names></name></person-group><year>2011</year><article-title>Biochemistry, evolution and physiological function of the Rnf complex, a novel ion-motive electron transport complex in prokaryotes</article-title><source>Cell Mol Life Sci</source><volume>68</volume><fpage>613</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1007/s00018-010-0555-8</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blaser</surname><given-names>MJ</given-names></name><name><surname>Falkow</surname><given-names>S</given-names></name></person-group><year>2009</year><article-title>What are the consequences of the disappearing human microbiota?</article-title><source>Nat Rev Microbiol</source><volume>7</volume><fpage>887</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1038/nrmicro2245</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ciccarelli</surname><given-names>FD</given-names></name><name><surname>Doerks</surname><given-names>T</given-names></name><name><surname>von Mering</surname><given-names>C</given-names></name><name><surname>Creevey</surname><given-names>CJ</given-names></name><name><surname>Snel</surname><given-names>B</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name></person-group><year>2006</year><article-title>Toward automatic reconstruction of a highly resolved tree of life</article-title><source>Science</source><volume>311</volume><fpage>1283</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1126/science.1123061</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><collab>Consortium HMP</collab></person-group><year>2012</year><article-title>Structure, function and diversity of the healthy human microbiome</article-title><source>Nature</source><volume>486</volume><fpage>207</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1038/Nature11234</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Creevey</surname><given-names>CJ</given-names></name><name><surname>McInerney</surname><given-names>JO</given-names></name></person-group><year>2005</year><article-title>Clann: investigating phylogenetic information through supertree analyses</article-title><source>Bioinformatics</source><volume>21</volume><fpage>390</fpage><lpage>2</lpage><pub-id pub-id-type="doi">10.1093/Bioinformatics/Bti020</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Danchin</surname><given-names>A</given-names></name></person-group><year>2009</year><article-title>Chapter 1: A phylogenetic view of bacterial ribonucleases</article-title><person-group person-group-type="editor"><name><surname>Condon</surname><given-names>C</given-names></name></person-group><source>Molecular Biology of RNA Processing and Decay in Prokaryotes</source><publisher-loc>New York</publisher-loc><publisher-name>Academic Press</publisher-name><pub-id pub-id-type="doi">10.1016/S0079-6603(08)00801-5</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Darling</surname><given-names>A</given-names></name><name><surname>Bik</surname><given-names>H</given-names></name><name><surname>Jospin</surname><given-names>G</given-names></name><name><surname>Eisen</surname><given-names>JA</given-names></name></person-group><year>2012</year><source>PhyloSift | mining the global metagenome</source><comment><ext-link ext-link-type="uri" xlink:href="http://phylosift.wordpress.com/">http://phylosift.wordpress.com/</ext-link></comment></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Darriba</surname><given-names>D</given-names></name><name><surname>Taboada</surname><given-names>GL</given-names></name><name><surname>Doallo</surname><given-names>R</given-names></name><name><surname>Posada</surname><given-names>D</given-names></name></person-group><year>2011</year><article-title>ProtTest 3: fast selection of best-fit models of protein evolution</article-title><source>Bioinformatics</source><volume>27</volume><fpage>1164</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btr088</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Daubin</surname><given-names>V</given-names></name><name><surname>Gouy</surname><given-names>M</given-names></name><name><surname>Perriere</surname><given-names>G</given-names></name></person-group><year>2002</year><article-title>A phylogenomic approach to bacterial phylogeny: evidence of a core of genes sharing a common history</article-title><source>Genome Res</source><volume>12</volume><fpage>1080</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1101/gr.187002</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>DePamphilis</surname><given-names>ML</given-names></name><name><surname>Adler</surname><given-names>J</given-names></name></person-group><year>1971</year><article-title>Fine structure and isolation of the hook-basal body complex of flagella from <italic>Escherichia coli</italic> and <italic>Bacillus subtilis</italic></article-title><source>J Bacteriol</source><volume>105</volume><fpage>384</fpage><lpage>95</lpage><ext-link ext-link-type="uri" xlink:href="http://jb.asm.org/content/105/1/384">http://jb.asm.org/content/105/1/384</ext-link></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>DeSantis</surname><given-names>TZ</given-names></name><name><surname>Hugenholtz</surname><given-names>P</given-names></name><name><surname>Larsen</surname><given-names>N</given-names></name><name><surname>Rojas</surname><given-names>M</given-names></name><name><surname>Brodie</surname><given-names>EL</given-names></name><name><surname>Keller</surname><given-names>K</given-names></name><etal/></person-group><year>2006</year><article-title>Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB</article-title><source>Appl Environ Microbiol</source><volume>72</volume><fpage>5069</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.1128/AEM.03006-05</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dethlefsen</surname><given-names>L</given-names></name><name><surname>Relman</surname><given-names>DA</given-names></name></person-group><year>2011</year><article-title>Incomplete recovery and individualized responses of the human distal gut microbiota to repeated antibiotic perturbation</article-title><source>Proc Natl Acad Sci USA</source><volume>108</volume><supplement>Suppl 1</supplement><fpage>4554</fpage><lpage>61</lpage><pub-id pub-id-type="doi">10.1073/pnas.1000087107</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dick</surname><given-names>GJ</given-names></name><name><surname>Andersson</surname><given-names>AF</given-names></name><name><surname>Baker</surname><given-names>BJ</given-names></name><name><surname>Simmons</surname><given-names>SL</given-names></name><name><surname>Thomas</surname><given-names>BC</given-names></name><name><surname>Yelton</surname><given-names>AP</given-names></name><etal/></person-group><year>2009</year><article-title>Community-wide analysis of microbial genome sequence signatures</article-title><source>Genome Biol</source><volume>10</volume><fpage>R85</fpage><pub-id pub-id-type="doi">10.1186/gb-2009-10-8-r85</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dimroth</surname><given-names>P</given-names></name></person-group><year>1980</year><article-title>A new sodium-transport system energized by the decarboxylation of oxaloacetate</article-title><source>FEBS lett</source><volume>122</volume><fpage>234</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1016/0014-5793(80)80446-7</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dimroth</surname><given-names>P</given-names></name><name><surname>Schink</surname><given-names>B</given-names></name></person-group><year>1998</year><article-title>Energy conservation in the decarboxylation of dicarboxylic acids by fermenting bacteria</article-title><source>Arch Microbiol</source><volume>170</volume><fpage>69</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1007/s002030050616</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Dworkin</surname><given-names>M</given-names></name></person-group><year>2006</year><source>Bacteria: firmicutes, Cyanobacteria</source><publisher-loc>New York</publisher-loc><publisher-name>Springer</publisher-name></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname><given-names>RC</given-names></name></person-group><year>2004</year><article-title>MUSCLE: multiple sequence alignment with high accuracy and high throughput</article-title><source>Nucleic Acids Res</source><volume>32</volume><fpage>1792</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1093/nar/gkh340</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Egami</surname><given-names>F</given-names></name></person-group><year>1977</year><article-title>Anaerobic respiration and photoautotrophy in the evolution of prokaryotes</article-title><source>Orig Life</source><volume>8</volume><fpage>169</fpage><lpage>71</lpage><pub-id pub-id-type="doi">10.1007/BF00927981</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Guindon</surname><given-names>S</given-names></name><name><surname>Gascuel</surname><given-names>O</given-names></name></person-group><year>2003</year><article-title>A simple, fast, and accurate algorithm to estimate large phylogenies by maximum likelihood</article-title><source>Syst Biol</source><volume>52</volume><fpage>696</fpage><lpage>704</lpage><pub-id pub-id-type="doi">10.1080/10635150390235520</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname><given-names>ES</given-names></name><name><surname>Banta</surname><given-names>AB</given-names></name><name><surname>Harris</surname><given-names>JK</given-names></name><name><surname>Pace</surname><given-names>NR</given-names></name><name><surname>Brown</surname><given-names>JW</given-names></name></person-group><year>1996</year><article-title>Structure and evolution of ribonuclease P RNA in gram-positive bacteria</article-title><source>Nucleic Acids Res</source><volume>24</volume><fpage>4775</fpage><lpage>82</lpage><pub-id pub-id-type="doi">10.1093/nar/24.23.4775</pub-id></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname><given-names>ES</given-names></name><name><surname>Morse</surname><given-names>DP</given-names></name><name><surname>Brown</surname><given-names>JW</given-names></name><name><surname>Schmidt</surname><given-names>FJ</given-names></name><name><surname>Pace</surname><given-names>NR</given-names></name></person-group><year>1991</year><article-title>Long-range structure in ribonuclease P RNA</article-title><source>Science</source><volume>254</volume><fpage>853</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1126/science.1719634</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hanaoka</surname><given-names>M</given-names></name><name><surname>Takai</surname><given-names>N</given-names></name><name><surname>Hosokawa</surname><given-names>N</given-names></name><name><surname>Fujiwara</surname><given-names>M</given-names></name><name><surname>Akimoto</surname><given-names>Y</given-names></name><name><surname>Kobori</surname><given-names>N</given-names></name><etal/></person-group><year>2012</year><article-title>RpaB, another response regulator operating circadian clock-dependent transcriptional regulation in Synechococcus elongatus PCC 7942</article-title><source>J Biol Chem</source><volume>287</volume><fpage>26321</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1074/jbc.M111.338251</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hoiczyk</surname><given-names>E</given-names></name><name><surname>Hansel</surname><given-names>A</given-names></name></person-group><year>2000</year><article-title>Cyanobacterial cell walls: news from an unusual prokaryotic envelope</article-title><source>J Bacteriol</source><volume>182</volume><fpage>1191</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1128/JB.182.5.1191-1199.2000</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hugenholtz</surname><given-names>P</given-names></name><name><surname>Pitulle</surname><given-names>C</given-names></name><name><surname>Hershberger</surname><given-names>KL</given-names></name><name><surname>Pace</surname><given-names>NR</given-names></name></person-group><year>1998</year><article-title>Novel division level bacterial diversity in a yellowstone hot spring</article-title><source>J Bacteriol</source><volume>180</volume><fpage>366</fpage><lpage>76</lpage><ext-link ext-link-type="uri" xlink:href="http://jb.asm.org/content/180/2/366">http://jb.asm.org/content/180/2/366</ext-link></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Iverson</surname><given-names>V</given-names></name><name><surname>Morris</surname><given-names>RM</given-names></name><name><surname>Frazar</surname><given-names>CD</given-names></name><name><surname>Berthiaume</surname><given-names>CT</given-names></name><name><surname>Morales</surname><given-names>RL</given-names></name><name><surname>Armbrust</surname><given-names>EV</given-names></name></person-group><year>2012</year><article-title>Untangling genomes from metagenomes: revealing an uncultured class of marine Euryarchaeota</article-title><source>Science</source><volume>335</volume><fpage>587</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1126/science.1212665</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kanehisa</surname><given-names>M</given-names></name><name><surname>Goto</surname><given-names>S</given-names></name></person-group><year>2000</year><article-title>KEGG: Kyoto Encyclopedia of Genes and Genomes</article-title><source>Nucleic Acids Res</source><volume>28</volume><fpage>27</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1093/nar/28.1.27</pub-id></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kanehisa</surname><given-names>M</given-names></name><name><surname>Goto</surname><given-names>S</given-names></name><name><surname>Sato</surname><given-names>Y</given-names></name><name><surname>Furumichi</surname><given-names>M</given-names></name><name><surname>Tanabe</surname><given-names>M</given-names></name></person-group><year>2012</year><article-title>KEGG for integration and interpretation of large-scale molecular datasets</article-title><source>Nucleic Acids Res</source><volume>40</volume><fpage>D109</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1093/nar/gkr988</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kneip</surname><given-names>C</given-names></name><name><surname>Voss</surname><given-names>C</given-names></name><name><surname>Lockhart</surname><given-names>PJ</given-names></name><name><surname>Maier</surname><given-names>UG</given-names></name></person-group><year>2008</year><article-title>The Cyanobacterial endosymbiont of the unicellular algae Rhopalodia gibba shows reductive genome evolution</article-title><source>BMC Evol Biol</source><volume>8</volume><fpage>30</fpage><pub-id pub-id-type="doi">10.1186/1471-2148-8-30</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koenig</surname><given-names>JE</given-names></name><name><surname>Spor</surname><given-names>A</given-names></name><name><surname>Scalfone</surname><given-names>N</given-names></name><name><surname>Fricker</surname><given-names>AD</given-names></name><name><surname>Stombaugh</surname><given-names>J</given-names></name><name><surname>Knight</surname><given-names>R</given-names></name><etal/></person-group><year>2011</year><article-title>Succession of microbial consortia in the developing infant gut microbiome</article-title><source>Proc Natl Acad Sci USA</source><volume>108</volume><supplement>Suppl 1</supplement><fpage>4578</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1073/pnas.1000081107</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Larsson</surname><given-names>J</given-names></name><name><surname>Nylander</surname><given-names>JA</given-names></name><name><surname>Bergman</surname><given-names>B</given-names></name></person-group><year>2011</year><article-title>Genome fluctuations in Cyanobacteria reflect evolutionary, developmental and adaptive traits</article-title><source>BMC Evol Biol</source><volume>11</volume><fpage>187</fpage><pub-id pub-id-type="doi">10.1186/1471-2148-11-187</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Latysheva</surname><given-names>N</given-names></name><name><surname>Junker</surname><given-names>VL</given-names></name><name><surname>Palmer</surname><given-names>WJ</given-names></name><name><surname>Codd</surname><given-names>GA</given-names></name><name><surname>Barker</surname><given-names>D</given-names></name></person-group><year>2012</year><article-title>The evolution of nitrogen fixation in Cyanobacteria</article-title><source>Bioinformatics</source><volume>28</volume><fpage>603</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/bts008</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Letunic</surname><given-names>I</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name></person-group><year>2007</year><article-title>Interactive tree of life (iTOL): an online tool for phylogenetic tree display and annotation</article-title><source>Bioinformatics</source><volume>23</volume><fpage>127</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btl529</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Letunic</surname><given-names>I</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name></person-group><year>2011</year><article-title>Interactive tree of life v2: online annotation and display of phylogenetic trees made easy</article-title><source>Nucleic Acids Res</source><volume>39</volume><fpage>W475</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1093/nar/gkr201</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ley</surname><given-names>RE</given-names></name><name><surname>Backhed</surname><given-names>F</given-names></name><name><surname>Turnbaugh</surname><given-names>P</given-names></name><name><surname>Lozupone</surname><given-names>CA</given-names></name><name><surname>Knight</surname><given-names>RD</given-names></name><name><surname>Gordon</surname><given-names>JI</given-names></name></person-group><year>2005</year><article-title>Obesity alters gut microbial ecology</article-title><source>Proc Natl Acad Sci USA</source><volume>102</volume><fpage>11070</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1073/pnas.0504978102</pub-id></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ley</surname><given-names>RE</given-names></name><name><surname>Hamady</surname><given-names>M</given-names></name><name><surname>Lozupone</surname><given-names>C</given-names></name><name><surname>Turnbaugh</surname><given-names>PJ</given-names></name><name><surname>Ramey</surname><given-names>RR</given-names></name><name><surname>Bircher</surname><given-names>JS</given-names></name><etal/></person-group><year>2008</year><article-title>Evolution of mammals and their gut microbes</article-title><source>Science</source><volume>320</volume><fpage>1647</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1126/science.1155725</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>R</given-names></name><name><surname>Ochman</surname><given-names>H</given-names></name></person-group><year>2007</year><article-title>Stepwise formation of the bacterial flagellar system</article-title><source>Proc Natl Acad Sci USA</source><volume>104</volume><fpage>7116</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1073/pnas.0700266104</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mackie</surname><given-names>RI</given-names></name></person-group><year>2002</year><article-title>Mutualistic fermentative digestion in the gastrointestinal tract: diversity and evolution</article-title><source>Integr Comp Biol</source><volume>42</volume><fpage>319</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1093/icb/42.2.319</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mai</surname><given-names>X</given-names></name><name><surname>Adams</surname><given-names>MW</given-names></name></person-group><year>1996</year><article-title>Purification and characterization of two reversible and ADP-dependent acetyl coenzyme A synthetases from the hyperthermophilic archaeon <italic>Pyrococcus furiosus</italic></article-title><source>J Bacteriol</source><volume>178</volume><fpage>5897</fpage><lpage>903</lpage><ext-link ext-link-type="uri" xlink:href="http://jb.asm.org/content/178/20/5897">http://jb.asm.org/content/178/20/5897</ext-link></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Marcy</surname><given-names>Y</given-names></name><name><surname>Ouverney</surname><given-names>C</given-names></name><name><surname>Bik</surname><given-names>EM</given-names></name><name><surname>Losekann</surname><given-names>T</given-names></name><name><surname>Ivanova</surname><given-names>N</given-names></name><name><surname>Martin</surname><given-names>HG</given-names></name><etal/></person-group><year>2007</year><article-title>Dissecting biological “dark matter” with single-cell genetic analysis of rare and uncultivated TM7 microbes from the human mouth</article-title><source>Proc Natl Acad Sci USA</source><volume>104</volume><fpage>11889</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1073/pnas.0704662104</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Marreiros</surname><given-names>BC</given-names></name><name><surname>Batista</surname><given-names>AP</given-names></name><name><surname>Duarte</surname><given-names>AM</given-names></name><name><surname>Pereira</surname><given-names>MM</given-names></name></person-group><year>2013</year><article-title>A missing link between complex I and group 4 membrane-bound [NiFe] hydrogenases</article-title><source>Biochim Biophys Acta</source><volume>1827</volume><fpage>198</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1016/j.bbabio.2012.09.012</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McDonald</surname><given-names>D</given-names></name><name><surname>Price</surname><given-names>MN</given-names></name><name><surname>Goodrich</surname><given-names>J</given-names></name><name><surname>Nawrocki</surname><given-names>EP</given-names></name><name><surname>DeSantis</surname><given-names>TZ</given-names></name><name><surname>Probst</surname><given-names>A</given-names></name><etal/></person-group><year>2012</year><article-title>An improved greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea</article-title><source>ISME J</source><volume>6</volume><fpage>610</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1038/ismej.2011.139</pub-id></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McInerney</surname><given-names>MJ</given-names></name><name><surname>Rohlin</surname><given-names>L</given-names></name><name><surname>Mouttaki</surname><given-names>H</given-names></name><name><surname>Kim</surname><given-names>U</given-names></name><name><surname>Krupp</surname><given-names>RS</given-names></name><name><surname>Rios-Hernandez</surname><given-names>L</given-names></name><etal/></person-group><year>2007</year><article-title>The genome of Syntrophus aciditrophicus: life at the thermodynamic limit of microbial growth</article-title><source>Proc Natl Acad Sci USA</source><volume>104</volume><fpage>7600</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1073/pnas.0610456104</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mertens</surname><given-names>E</given-names></name></person-group><year>1991</year><article-title>Pyrophosphate-dependent phosphofructokinase, an anaerobic glycolytic enzyme?</article-title><source>FEBS lett</source><volume>285</volume><fpage>1</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1016/0014-5793(91)80711-B</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mouser</surname><given-names>PJ</given-names></name><name><surname>N’Guessan</surname><given-names>AL</given-names></name><name><surname>Elifantz</surname><given-names>H</given-names></name><name><surname>Holmes</surname><given-names>DE</given-names></name><name><surname>Williams</surname><given-names>KH</given-names></name><name><surname>Wilkins</surname><given-names>MJ</given-names></name><etal/></person-group><year>2009</year><article-title>Influence of heterogeneous ammonium availability on bacterial community structure and the expression of nitrogen fixation and ammonium transporter genes during in situ bioremediation of uranium-contaminated groundwater</article-title><source>Environ Sci Technol</source><volume>43</volume><fpage>4386</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1021/es8031055</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mulkidjanian</surname><given-names>AY</given-names></name><name><surname>Koonin</surname><given-names>EV</given-names></name><name><surname>Makarova</surname><given-names>KS</given-names></name><name><surname>Mekhedov</surname><given-names>SL</given-names></name><name><surname>Sorokin</surname><given-names>A</given-names></name><name><surname>Wolf</surname><given-names>YI</given-names></name><etal/></person-group><year>2006</year><article-title>The cyanobacterial genome core and the origin of photosynthesis</article-title><source>Proc Natl Acad Sci USA</source><volume>103</volume><fpage>13126</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1073/pnas.0605709103</pub-id></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Muro-Pastor</surname><given-names>MI</given-names></name><name><surname>Reyes</surname><given-names>JC</given-names></name><name><surname>Florencio</surname><given-names>FJ</given-names></name></person-group><year>2005</year><article-title>Ammonium assimilation in Cyanobacteria</article-title><source>Photosynth Res</source><volume>83</volume><fpage>135</fpage><lpage>50</lpage><pub-id pub-id-type="doi">10.1007/s11120-004-2082-7</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pace</surname><given-names>NR</given-names></name></person-group><year>1997</year><article-title>A molecular view of microbial diversity and the biosphere</article-title><source>Science</source><volume>276</volume><fpage>734</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1126/science.276.5313.734</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Peng</surname><given-names>Y</given-names></name><name><surname>Leung</surname><given-names>HC</given-names></name><name><surname>Yiu</surname><given-names>SM</given-names></name><name><surname>Chin</surname><given-names>FY</given-names></name></person-group><year>2012</year><article-title>IDBA-UD: a de novo assembler for single-cell and metagenomic sequencing data with highly uneven depth</article-title><source>Bioinformatics</source><volume>28</volume><fpage>1420</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/bts174</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Podar</surname><given-names>M</given-names></name><name><surname>Abulencia</surname><given-names>CB</given-names></name><name><surname>Walcher</surname><given-names>M</given-names></name><name><surname>Hutchison</surname><given-names>D</given-names></name><name><surname>Zengler</surname><given-names>K</given-names></name><name><surname>Garcia</surname><given-names>JA</given-names></name><etal/></person-group><year>2007</year><article-title>Targeted access to the genomes of low-abundance organisms in complex microbial communities</article-title><source>Appl Environ Microbiol</source><volume>73</volume><fpage>3205</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1128/AEM.02985-06</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Purves</surname><given-names>WK</given-names></name><name><surname>Sadava</surname><given-names>D</given-names></name><name><surname>Orians</surname><given-names>GH</given-names></name><name><surname>Heller</surname><given-names>HC</given-names></name></person-group><year>2003</year><article-title>Life: the science of biology</article-title><source>Evolution, diversity, and ecology</source><publisher-loc>New York</publisher-loc><publisher-name>Macmillan</publisher-name><fpage>537</fpage></element-citation></ref><ref id="bib57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Raes</surname><given-names>J</given-names></name><name><surname>Korbel</surname><given-names>JO</given-names></name><name><surname>Lercher</surname><given-names>MJ</given-names></name><name><surname>von Mering</surname><given-names>C</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name></person-group><year>2007</year><article-title>Prediction of effective genome size in metagenomic samples</article-title><source>Genome Biol</source><volume>8</volume><fpage>R10</fpage><pub-id pub-id-type="doi">10.1186/gb-2007-8-1-r10</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sagan</surname><given-names>L</given-names></name></person-group><year>1967</year><article-title>On the origin of mitosing cells</article-title><source>J Theor Biol</source><volume>14</volume><fpage>255</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1016/0022-5193(67)90079-3</pub-id></element-citation></ref><ref id="bib59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sanchez-Baracaldo</surname><given-names>P</given-names></name><name><surname>Hayes</surname><given-names>PK</given-names></name><name><surname>Blank</surname><given-names>CE</given-names></name></person-group><year>2005</year><article-title>Morphological and habitat evolution in the Cyanobacteria using a compartmentalization approach</article-title><source>Geobiology</source><volume>3</volume><fpage>145</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1111/j.1472-4669.2005.00050.x</pub-id></element-citation></ref><ref id="bib60"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Schaechter</surname><given-names>M</given-names></name></person-group><year>2010</year><source>Desk encyclopedia of microbiology</source><publisher-loc>Oxford</publisher-loc><publisher-name>Academic Press</publisher-name><fpage>340</fpage></element-citation></ref><ref id="bib61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schocke</surname><given-names>L</given-names></name><name><surname>Schink</surname><given-names>B</given-names></name></person-group><year>1998</year><article-title>Membrane-bound proton-translocating pyrophosphatase of syntrophus gentianae, a syntrophically benzoate-degrading fermenting bacterium</article-title><source>Eur J Biochem</source><volume>256</volume><fpage>589</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1046/j.1432-1327.1998.2560589.x</pub-id></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sharon</surname><given-names>I</given-names></name><name><surname>Morowitz</surname><given-names>MJ</given-names></name><name><surname>Thomas</surname><given-names>BC</given-names></name><name><surname>Costello</surname><given-names>EK</given-names></name><name><surname>Relman</surname><given-names>DA</given-names></name><name><surname>Banfield</surname><given-names>JF</given-names></name></person-group><year>2013</year><article-title>Time series community genomics analysis reveals rapid shifts in bacterial species, strains, and phage during infant gut colonization</article-title><source>Genome Res</source><volume>23</volume><fpage>111</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1101/gr.142315.112</pub-id></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname><given-names>T</given-names></name><name><surname>Falkowski</surname><given-names>PG</given-names></name></person-group><year>2008</year><article-title>Genome evolution in cyanobacteria: the stable core and the variable shell</article-title><source>Proc Natl Acad Sci USA</source><volume>105</volume><fpage>2510</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1073/pnas.0711165105</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sieber</surname><given-names>JR</given-names></name><name><surname>McInerney</surname><given-names>MJ</given-names></name><name><surname>Gunsalus</surname><given-names>RP</given-names></name></person-group><year>2012</year><article-title>Genomic insights into syntrophy: the paradigm for anaerobic metabolic cooperation</article-title><source>Annu Rev Microbiol</source><volume>66</volume><fpage>429</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1146/annurev-micro-090110-102844</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sorek</surname><given-names>R</given-names></name><name><surname>Zhu</surname><given-names>Y</given-names></name><name><surname>Creevey</surname><given-names>CJ</given-names></name><name><surname>Francino</surname><given-names>MP</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name><name><surname>Rubin</surname><given-names>EM</given-names></name></person-group><year>2007</year><article-title>Genome-wide experimental determination of barriers to horizontal gene transfer</article-title><source>Science</source><volume>318</volume><fpage>1449</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1126/science.1147112</pub-id></element-citation></ref><ref id="bib66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stal</surname><given-names>LJ</given-names></name><name><surname>Moezelaar</surname><given-names>R</given-names></name></person-group><year>1997</year><article-title>Fermentation in Cyanobacteria</article-title><source>FEMS Microbiol Rev</source><volume>21</volume><fpage>179</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1111/j.1574-6976.1997.tb00350.x</pub-id></element-citation></ref><ref id="bib67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stamatakis</surname><given-names>A</given-names></name></person-group><year>2006</year><article-title>RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models</article-title><source>Bioinformatics</source><volume>22</volume><fpage>2688</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btl446</pub-id></element-citation></ref><ref id="bib68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tamagnini</surname><given-names>P</given-names></name><name><surname>Leitao</surname><given-names>E</given-names></name><name><surname>Oliveira</surname><given-names>P</given-names></name><name><surname>Ferreira</surname><given-names>D</given-names></name><name><surname>Pinto</surname><given-names>F</given-names></name><name><surname>Harris</surname><given-names>DJ</given-names></name><etal/></person-group><year>2007</year><article-title>Cyanobacterial hydrogenases: diversity, regulation and applications</article-title><source>FEMS Microbiol Rev</source><volume>31</volume><fpage>692</fpage><lpage>720</lpage><pub-id pub-id-type="doi">10.1111/j.1574-6976.2007.00085.x</pub-id></element-citation></ref><ref id="bib69"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tripp</surname><given-names>HJ</given-names></name><name><surname>Bench</surname><given-names>SR</given-names></name><name><surname>Turk</surname><given-names>KA</given-names></name><name><surname>Foster</surname><given-names>RA</given-names></name><name><surname>Desany</surname><given-names>BA</given-names></name><name><surname>Niazi</surname><given-names>F</given-names></name><etal/></person-group><year>2010</year><article-title>Metabolic streamlining in an open-ocean nitrogen-fixing cyanobacterium</article-title><source>Nature</source><volume>464</volume><fpage>90</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.1038/nature08786</pub-id></element-citation></ref><ref id="bib70"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Turnbaugh</surname><given-names>PJ</given-names></name><name><surname>Ridaura</surname><given-names>VK</given-names></name><name><surname>Faith</surname><given-names>JJ</given-names></name><name><surname>Rey</surname><given-names>FE</given-names></name><name><surname>Knight</surname><given-names>R</given-names></name><name><surname>Gordon</surname><given-names>JI</given-names></name></person-group><year>2009</year><article-title>The effect of diet on the human gut microbiome: a metagenomic analysis in humanized gnotobiotic mice</article-title><source>Sci Transl Med</source><volume>1</volume><fpage>6ra14</fpage><pub-id pub-id-type="doi">10.1126/scitranslmed.3000322</pub-id></element-citation></ref><ref id="bib71"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Waasbergen</surname><given-names>LG</given-names></name><name><surname>Dolganov</surname><given-names>N</given-names></name><name><surname>Grossman</surname><given-names>AR</given-names></name></person-group><year>2002</year><article-title>nblS, a gene involved in controlling photosynthesis-related gene expression during high light and nutrient stress in Synechococcus elongatus PCC 7942</article-title><source>J Bacteriol</source><volume>184</volume><fpage>2481</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1128/JB.184.9.2481-2490.2002</pub-id></element-citation></ref><ref id="bib72"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vignais</surname><given-names>PM</given-names></name><name><surname>Billoud</surname><given-names>B</given-names></name></person-group><year>2007</year><article-title>Occurrence, classification, and biological function of hydrogenases: an overview</article-title><source>Chem Rev</source><volume>107</volume><fpage>4206</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.1021/cr050196r</pub-id></element-citation></ref><ref id="bib73"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vitousek</surname><given-names>PM</given-names></name><name><surname>Cassman</surname><given-names>K</given-names></name><name><surname>Cleveland</surname><given-names>C</given-names></name><name><surname>Crews</surname><given-names>T</given-names></name><name><surname>Field</surname><given-names>CB</given-names></name><name><surname>Grimm</surname><given-names>NB</given-names></name><etal/></person-group><year>2002</year><article-title>Towards an ecological understanding of biological nitrogen fixation</article-title><source>Biogeochemistry</source><volume>57</volume><fpage>1</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1023/A:1015798428743</pub-id></element-citation></ref><ref id="bib74"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wrighton</surname><given-names>KC</given-names></name><name><surname>Thomas</surname><given-names>BC</given-names></name><name><surname>Sharon</surname><given-names>I</given-names></name><name><surname>Miller</surname><given-names>CS</given-names></name><name><surname>Castelle</surname><given-names>CJ</given-names></name><name><surname>VerBerkmoes</surname><given-names>NC</given-names></name><etal/></person-group><year>2012</year><article-title>Fermentation, hydrogen, and sulfur metabolism in multiple uncultivated bacterial phyla</article-title><source>Science</source><volume>337</volume><fpage>1661</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1126/science.1224041</pub-id></element-citation></ref><ref id="bib75"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname><given-names>M</given-names></name><name><surname>Eisen</surname><given-names>JA</given-names></name></person-group><year>2008</year><article-title>A simple, fast, and accurate method of phylogenomic inference</article-title><source>Genome Biol</source><volume>9</volume><fpage>R151</fpage><pub-id pub-id-type="doi">10.1186/gb-2008-9-10-r151</pub-id></element-citation></ref><ref id="bib76"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yatsunenko</surname><given-names>T</given-names></name><name><surname>Rey</surname><given-names>FE</given-names></name><name><surname>Manary</surname><given-names>MJ</given-names></name><name><surname>Trehan</surname><given-names>I</given-names></name><name><surname>Dominguez-Bello</surname><given-names>MG</given-names></name><name><surname>Contreras</surname><given-names>M</given-names></name><etal/></person-group><year>2012</year><article-title>Human gut microbiome viewed across age and geography</article-title><source>Nature</source><volume>486</volume><fpage>222</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1038/nature11053</pub-id></element-citation></ref><ref id="bib77"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Youssef</surname><given-names>NH</given-names></name><name><surname>Blainey</surname><given-names>PC</given-names></name><name><surname>Quake</surname><given-names>SR</given-names></name><name><surname>Elshahed</surname><given-names>MS</given-names></name></person-group><year>2011</year><article-title>Partial genome assembly for a candidate division OP11 single cell from an anoxic spring (zodletone spring, Oklahoma)</article-title><source>Appl Environ Microbiol</source><volume>77</volume><fpage>7804</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1128/AEM.06059-11</pub-id></element-citation></ref><ref id="bib78"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yusuf</surname><given-names>D</given-names></name><name><surname>Marz</surname><given-names>M</given-names></name><name><surname>Stadler</surname><given-names>PF</given-names></name><name><surname>Hofacker</surname><given-names>IL</given-names></name></person-group><year>2010</year><article-title>Bcheck: a wrapper tool for detecting RNase P RNA genes</article-title><source>BMC Genomics</source><volume>11</volume><fpage>432</fpage><pub-id pub-id-type="doi">10.1186/1471-2164-11-432</pub-id></element-citation></ref><ref id="bib79"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zehr</surname><given-names>JP</given-names></name><name><surname>Jenkins</surname><given-names>BD</given-names></name><name><surname>Short</surname><given-names>SM</given-names></name><name><surname>Steward</surname><given-names>GF</given-names></name></person-group><year>2003</year><article-title>Nitrogenase gene diversity and microbial community structure: a cross-system comparison</article-title><source>Environ Microbiol</source><volume>5</volume><fpage>539</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1046/j.1462-2920.2003.00451.x</pub-id></element-citation></ref><ref id="bib80"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zerbino</surname><given-names>DR</given-names></name><name><surname>Birney</surname><given-names>E</given-names></name></person-group><year>2008</year><article-title>Velvet: algorithms for de novo short read assembly using de Bruijn graphs</article-title><source>Genome Res</source><volume>18</volume><fpage>821</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1101/gr.074492.107</pub-id></element-citation></ref><ref id="bib82"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>K</given-names></name><name><surname>Martiny</surname><given-names>AC</given-names></name><name><surname>Reppas</surname><given-names>NB</given-names></name><name><surname>Barry</surname><given-names>KW</given-names></name><name><surname>Malek</surname><given-names>J</given-names></name><name><surname>Chisholm</surname><given-names>SW</given-names></name><etal/></person-group><year>2006</year><article-title>Sequencing genomes from single cells by polymerase cloning</article-title><source>Nat Biotechnol</source><volume>24</volume><fpage>680</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1038/nbt1214</pub-id></element-citation></ref><ref id="bib81"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>S</given-names></name><name><surname>Bryant</surname><given-names>DA</given-names></name></person-group><year>2011</year><article-title>The tricarboxylic acid cycle in Cyanobacteria</article-title><source>Science</source><volume>334</volume><fpage>1551</fpage><lpage>3</lpage><pub-id pub-id-type="doi">10.1126/science.1210858</pub-id></element-citation></ref><ref id="bib83"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zwieb</surname><given-names>C</given-names></name><name><surname>Nakao</surname><given-names>Y</given-names></name><name><surname>Nakashima</surname><given-names>T</given-names></name><name><surname>Takagi</surname><given-names>H</given-names></name><name><surname>Goda</surname><given-names>S</given-names></name><name><surname>Andersen</surname><given-names>ES</given-names></name><etal/></person-group><year>2011</year><article-title>Structural modeling of RNase P RNA of the hyperthermophilic archaeon Pyrococcus horikoshii OT3</article-title><source>Biochem Biophys Res Commun</source><volume>414</volume><fpage>517</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1016/J.Bbrc.2011.09.098</pub-id></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01102.023</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Kolter</surname><given-names>Roberto</given-names></name><role>Reviewing editor</role><aff><institution>Harvard Medical School</institution>, <country>United States</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elife.elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>[Editors’ note: a previous version of this study was rejected after peer review, but the authors submitted for reconsideration. The two decision letters after peer review are shown below.]</p><p>Many thanks for submitting your work on the genome reconstruction and subsequent analyses of the several members of the previously only poor characterized ‘deep branching Cyanobacteria’ (The human gut and subsurface sediment harbor non-photosynthetic Cyanobacteria). In addition to myself (Roberto Kolter), two other individuals read and offered comments on the manuscript. Overall we found the analyses quite intriguing and have discussed our opinions extensively since providing our initial reviews. My sense is that we have come to a good consensus about the manuscript. We all see some important work here but have some serious concerns that can be summarized as follows:</p><p>1) There is a major concern that there needs to be better support for the claim that these genomes, as constructed from metagenomic data, do indeed occur in nature (i.e., that they are ‘real’). We see two possibilities here. It may be that you can indeed develop an argument that from the bioinformatic analyses you have absolute confidence that the assembled sequences are true representations of extant genomes in nature. Alternatively, there are experimental ways to determine this. In the spirit of <italic>eLife</italic>, we do not wish to recommend “make work” types of experiments. The former solution should not necessitate bench work; the latter does and could take a lot of time. We recognize that. But you might already have some of those data.</p><p>2) A second major concern has to do with the designation of these organisms as Cyanobacteria to begin with. In fact, if anything, we would argue that your results make a very compelling argument that these bacteria need to be considered in a class (phylum?) all by themselves. In this regard, the paper would have to be revised greatly because it currently rests on the assertion that these are “truly non-photosynthetic Cyanobacteria”. This concern is a bit larger than it seems. After all, the main interest in this manuscript was the characterization of non-photosynthetic cyanos. But after reading the results, it is clear that the initial designation of these as Cyanobacteria was probably premature and off the mark. Nonetheless, we feel that this correction should be communicated.</p><p>Regarding the overall writing of manuscript (particularly as it pertains to the usage of inappropriate usage of the term ‘Cyanobacteria’) we had split reactions. One reviewer found the paper well written, I must confess that I did not. I marked the PDF extensively with suggested edits.</p><p>Considering these major concerns, and the possibility that additional experiments might be needed, I am recommending that the submission be rejected. That will free you and your colleagues to submit the work (perhaps editing it as a results of our comments) elsewhere. However, I want you to consider this a “soft reject” that leaves open the opportunity for a resubmission. If you can make a compelling case that this new phylum is intrinsically important and interesting and can provide a stronger case that the genomes are real, we would be glad to reconsider it. I, for one, would love to see how knowledge of the genomes might guide you and your colleagues to cultivate members of this group. Not that I would hold you to it, but some discussion of why they have not been cultivated based on genome knowledge would be welcome.</p><p>[Editors’ note: what now follows is the decision letter after the authors submitted for further consideration.]</p><p>Thank you for sending your resubmission to <italic>eLife</italic>, which is now entitled “The human gut and groundwater harbor non-photosynthetic bacteria belonging to a new phylum sibling to Cyanobacteria”. This new version of your article has been favorably evaluated by a Senior editor, myself as a member of the Board of Reviewing Editors, and two expert reviewers (one of them, Jon Zehr agreed to reveal his identity).</p><p>After our initial individual evaluation of the manuscript we discussed our comments and reached a consensus that by and large you and your co-authors have addressed our prior main concerns which were: (a) that this division was called “deep branching Cyanobacteria” when all evidence pointed to the bacteria not being cyanos and (b) that there was not enough description of how the genomes had been assembled. In addition, we feel that the revised manuscript is much improved in terms of its written style. Nonetheless, all of us felt that the manuscript could still use some revising before it can be accepted for publication. In short, the key changes still needed include:</p><p>1) Given the lack of cultured representatives, this should be still referred to as “candidate phylum”.</p><p>2) The discussion of the light sensing genes should be modified and streamlined because the direct evidence that the homologs are indeed involved in light sensing is somewhat weak.</p><p>3) The discussion of the evolution of nitrogenase may need to be re-pitched as there is no well-accepted view on the original evolution of nitrogenase. We feel some of your arguments could be used in favor of its being present in the last common ancestor of Cyanobacteria.</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01102.024</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p>[Editors’ note: the author responses to the first round of peer review follow.]</p><p><italic>1) There is a major concern that there needs to be better support for the claim that these genomes, as constructed from metagenomic data, do indeed occur in nature (i.e., that they are ‘real’). We see two possibilities here. It may be that you can indeed develop an argument that from the bioinformatic analyses you have absolute confidence that the assembled sequences are true representations of extant genomes in nature. Alternatively, there are experimental ways to determine this. In the spirit of eLife, we do not wish to recommend “make work” types of experiments. The former solution should not necessitate bench work; the latter does and could take a lot of time. We recognize that. But you might already have some of those data</italic>.</p><p>We have provided a very detailed description of the genome reconstruction process for one of the genomes reported in the study and this process is summarized in the main text of the paper. As you will see, the process is very well controlled and relies on information from multiple independent sources that were used for verifying the assembly. Our criteria for defining complete genomes are very stringent and include far more than the presence of all 57 single copy genes. Specifically, we resolved all connections between assembled scaffolds assigned to the genome. This last criterion makes it very unlikely that parts of the genome will be left out or that foreign fragments will be added to the genome. In fact, given the absence of genome amplification, it is likely that the genomes recovered using these methods are more complete and at least as accurate as genomes recovered by single cell genomics, the widely accepted method for studying uncultivated microbes. Further supporting the veracity of the assemblies, the identification of scaffolds that belong to these genomes was very simple. Beyond this step, the process is very similar to culture-based genomics. Please note that reconstructing genomes from metagenomic data is not a new approach and has a long history in both the Banfield Lab (e.g., Tyson et al., Nature, 2004; Lo et al. Nature, 2007; Aliaga Goltsman et al. Applied and Environmental Microbiology, 2009; Baker et al. PNAS, 2010; Wrighton et al., Science, 2012; Sharon et al., Genome Research, 2013) and other labs (e.g., Iverson et al., Science, 2012; Albertsen et al., Nature Biotechnology, 2013).</p><p>Testing the credibility of our methods using, for example, single cell genomics or cultivation is not only difficult but also probably impossible since there is no guarantee that the strain (or species) captured by one of the other methods will be the same as the one assembled from the metagenomic data. However we recently had the opportunity to validate the assembly of a genome recovered by us from subsurface sediment (Castelle et al., Nature Communications, in press) that was assembled using very similar methods to those used here. The validation was done using long reads (∼8 kbp) that were sequenced from the same sample using the Moleculo sequencing technology. 322 of the 340 reads (95%) belonging to the genome aligned perfectly to the assembled genome. Disagreements between the remaining 18 reads and the assembled genome were checked and found to be the result of 11 local mis-assemblies of a few hundred base pairs each (compared to a genome size of 2.2 Mbp). These localized mis-assemblies would also have arisen in an isolate genome assembly. Hence, this experiment provides independent verification of the reliability of our metagenomic genome recovery method.</p><p><italic>2) A second major concern has to do with the designation of these organisms as Cyanobacteria to begin with. In fact, if anything, we would argue that your results make a very compelling argument that these bacteria need to be considered in a class (phylum?) all by themselves. In this regard, the paper would have to be revised greatly because it currently rests on the assertion that these are “truly non-photosythetic Cyanobacteria”. This concern is a bit larger than it seems. After all, the main interest in this manuscript was the characterization of non-photosynthetic cyanos. But after reading the results, it is clear that the initial designation of these as Cyanobacteria was probably premature and off the mark. Nonetheless, we feel that this correction should be communicated</italic>.</p><p>We have now revised the manuscript extensively and defined the reported genomes as representing a new phylum. Following a suggestion from one of the reviewers we also propose the name “Melainabacteria” for the new phylum. This decision is supported by commonly accepted criteria for defining new phyla (16S rRNA similarity to known phyla – see text) and we thank the reviewers for raising this important issue. We do insist however that the new genomes are the closest relatives to Cyanobacteria that have been sequenced to date. This relies on both 16S rRNA phylogeny, which is the accepted gold standard for inferring phylogenetic relations, and also phylogeny that is based on concatenated ribosomal proteins, which have proved to be very reliable.</p><p>We agree that the message of the paper should not have been restricted to the discovery of “truly non-photosynthetic Cyanobacteria”. We revised the paper to highlight implications for the evolutionary history of Cyanobacteria.</p><p>The current study provides a first glimpse into a new group of organisms inhabiting the human gut that is prevalent in the West, appears to be more prevalent in non-Western countries, and may have had an important role in maintaining human health in the past. As an aside, we mention that the group of organisms reported in this study was included in the 200 “most wanted genomes” of the NIH Human Microbiome Project, which underscores the interest of the human microbiome community in these organisms. Taken together, we strongly believe that the results reported here are of interest for a wide range of researchers as expected from a paper submitted to a wide audience journal such as <italic>eLife</italic>.</p><p><italic>[Editors’ note: the author responses to the re-review follow.</italic>]</p><p>With regards to the key changes requested:</p><p>1) We now refer to the Melainabacteria phylum as a candidate phylum throughout the paper.</p><p>2) We have carefully rewritten the light sensing discussion noting that these genes are not capable of sensing light but rather are regulator genes that may have been co-opted for regulating light responsive genes and thus may have aided the acquisition of light systems.</p><p>3) We have noted in our discussion of nitrogenase evolution that ambiguity still exists as we cannot exclude the possibility of an ancient shared nitrogenase existing prior to the present nitrogenase complexes in Melainabacteria and Cyanobacteria.</p><p>Furthermore, we have also clarified the flagellin TLR5 activation sequence figure (<xref ref-type="fig" rid="fig7">Figure 7</xref>) by rearranging the sequences according to similarity in the TLR5 activation region and by indicating which sequences we predict by sequence similarity to be recognized by TLR5. We have also removed the sentence from the Discussion that mentioned our attempts to culture this group. This is because one of us (Ruth) felt that our attempts had been somewhat haphazard, and since culturing anaerobes is not one of our core specialties, this information is not likely to be informative (i.e., another more specialized group may succeed using these methods).</p></body></sub-article></article> |