Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
1032 lines (958 sloc) 118 KB
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Adina Howe at 2012-10-30 14:15:26 -0500
%% Saved with string encoding Unicode (UTF-8)
@article{Angiuoli:2011hd,
Author = {Angiuoli, Samuel V and White, James R and Matalka, Malcolm and White, Owen and Fricke, W Florian},
Date-Added = {2012-10-30 19:11:11 +0000},
Date-Modified = {2012-10-30 19:11:11 +0000},
Journal = {PLoS ONE},
Month = oct,
Number = {10},
Pages = {e26624},
Title = {{Resources and Costs for Microbial Sequence Analysis Evaluated Using Virtual Machines and Cloud Computing}},
Volume = {6},
Year = {2011}}
@article{Hansen:2010if,
Author = {Hansen, Kasper D and Brenner, Steven E and Dudoit, Sandrine},
Date-Added = {2012-10-30 19:09:05 +0000},
Date-Modified = {2012-10-30 19:09:05 +0000},
Journal = {Nucleic Acids Research},
Month = jun,
Number = {12},
Pages = {e131--e131},
Title = {{Biases in Illumina transcriptome sequencing caused by random hexamer priming}},
Volume = {38},
Year = {2010}}
@article{Minoche:2011fl,
Author = {Minoche, Andr{\'e} E and Dohm, Juliane C and Himmelbauer, Heinz},
Date-Added = {2012-10-30 19:09:05 +0000},
Date-Modified = {2012-10-30 19:09:05 +0000},
Journal = {Genome Biology},
Month = nov,
Number = {11},
Pages = {R112},
Title = {{Evaluation of genomic high-throughput sequencing data generated on Illumina HiSeq and Genome Analyzer systems}},
Volume = {12},
Year = {2011}}
@article{Dohm:2008ky,
Author = {Dohm, Juliane C and Lottaz, Claudio and Borodina, Tatiana and Himmelbauer, Heinz},
Date-Added = {2012-10-30 19:09:05 +0000},
Date-Modified = {2012-10-30 19:09:05 +0000},
Journal = {Nucleic Acids Research},
Month = aug,
Number = {16},
Pages = {e105--e105},
Title = {{Substantial biases in ultra-short read data sets from high-throughput DNA sequencing}},
Volume = {36},
Year = {2008}}
@article{Haas:2011jg,
Author = {Haas, B J and Gevers, D and Earl, A M and Feldgarden, M and Ward, D V and Giannoukos, G and Ciulla, D and Tabbaa, D and Highlander, S K and Sodergren, E and Methe, B and DeSantis, T Z and {The Human Microbiome Consortium} and Petrosino, J F and Knight, R and Birren, B W},
Date-Added = {2012-10-25 21:21:30 +0000},
Date-Modified = {2012-10-25 21:21:30 +0000},
Journal = {Genome Research},
Month = mar,
Number = {3},
Pages = {494--504},
Title = {{Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons}},
Volume = {21},
Year = {2011}}
@article{Li:2010jz,
Author = {Li, Ruiqiang and Zhu, Hongmei and Ruan, Jue and Qian, Wubin and Fang, Xiaodong and Shi, Zhongbin and Li, Yingrui and Li, Shengting and Shan, Gao and Kristiansen, Karsten and Li, Songgang and Yang, Huanming and Wang, Jian and Wang, Jun},
Date-Added = {2012-10-25 21:07:58 +0000},
Date-Modified = {2012-10-25 21:07:58 +0000},
Journal = {Genome Research},
Month = jan,
Number = {2},
Pages = {265--272},
Title = {{De novo assembly of human genomes with massively parallel short read sequencing}},
Volume = {20},
Year = {2010}}
@article{Chitsaz:2011kr,
Author = {Chitsaz, Hamidreza and Yee-Greenbaum, Joyclyn L and Tesler, Glenn and Lombardo, Mary-Jane and Dupont, Christopher L and Badger, Jonathan H and Novotny, Mark and Rusch, Douglas B and Fraser, Louise J and Gormley, Niall A and Schulz-Trieglaff, Ole and Smith, Geoffrey P and Evers, Dirk J and Pevzner, Pavel A and Lasken, Roger S},
Date-Added = {2012-10-25 21:07:58 +0000},
Date-Modified = {2012-10-25 21:07:58 +0000},
Journal = {Nature Biotechnology},
Month = sep,
Number = {10},
Pages = {915--921},
Title = {{Efficient de novo assembly of single-cell bacterial genomes from short-read data sets}},
Volume = {29},
Year = {2011}}
@article{Namiki:2012iq,
Author = {Namiki, T and Hachiya, T and Tanaka, H and Sakakibara, Y},
Date-Added = {2012-10-25 21:07:58 +0000},
Date-Modified = {2012-10-25 21:07:58 +0000},
Journal = {Nucleic Acids Research},
Month = jul,
Title = {{MetaVelvet: an extension of Velvet assembler to de novo metagenome assembly from short sequence reads}},
Year = {2012}}
@article{Pell:2012cq,
Author = {Pell, J and Hintze, A and Canino-Koning, R and Howe, A and Tiedje, J M and Brown, C T},
Date-Added = {2012-10-25 19:16:46 +0000},
Date-Modified = {2012-10-25 19:16:46 +0000},
Journal = {Proceedings of the National Academy of Sciences of the United States of America},
Number = {33},
Pages = {13272--13277},
Title = {{Scaling metagenome sequence assembly with probabilistic de Bruijn graphs}},
Volume = {109},
Year = {2012}}
@article{Morgan:2010p740,
Author = {J.L Morgan and A.E Darling and J.A Eisen},
Date-Added = {2012-08-10 15:31:23 -0400},
Date-Modified = {2012-08-10 15:31:23 -0400},
Journal = {PLoS ONE},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Morgan/PLoS%20ONE%202010%20Morgan-1.pdf},
Number = {4},
Pages = {e10209},
Pmid = {17759976494534016177related:sbz8f4YcePYJ},
Rating = {0},
Read = {Yes},
Title = {Metagenomic sequencing of an in vitro-simulated microbial community},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p740},
Volume = {5},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAd4AAAAAAd4AAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtksxpQTG9TIE9ORSAyMDEwIE1vcmdhbi0xLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3p5zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGTW9yZ2FuABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZLMAC2RsAAtiwQAFwSgABcEnAAIN+QACAFZNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMDoATW9yZ2FuOgBQTG9TIE9ORSAyMDEwIE1vcmdhbi0xLnBkZgAOADYAGgBQAEwAbwBTACAATwBOAEUAIAAyADAAMQAwACAATQBvAHIAZwBhAG4ALQAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBDVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEwL01vcmdhbi9QTG9TIE9ORSAyMDEwIE1vcmdhbi0xLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QPS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9Nb3JnYW4vUExvUyBPTkUgMjAxMCBNb3JnYW4tMS5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACggKEAokCkgKdAqECrwK2Ar8C/wMEAwcDFAMZAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAys=}}
@article{Li:2010p234,
Abstract = {Next-generation massively parallel DNA sequencing technologies provide ultrahigh throughput at a substantially lower unit data cost; however, the data are very short read length sequences, making de novo assembly extremely challenging. Here, we describe a novel method for de novo assembly of large genomes from short read sequences. We successfully assembled both the Asian and African human genome sequences, achieving an N50 contig size of 7.4 and 5.9 kilobases (kb) and scaffold of 446.3 and 61.9 kb, respectively. The development of this de novo short read assembly method creates new opportunities for building reference sequences and carrying out accurate analyses of unexplored genomes in a cost-effective way.},
Affiliation = {Beijing Genomics Institute at Shenzhen, Shenzhen 518083, China.},
Author = {Ruiqiang Li and Hongmei Zhu and Jue Ruan and Wubin Qian and Xiaodong Fang and Zhongbin Shi and Yingrui Li and Shengting Li and Gao Shan and Karsten Kristiansen and Songgang Li and Huanming Yang and Jian Wang and Jun Wang},
Date-Added = {2010-06-15 11:18:52 -0400},
Date-Modified = {2010-06-15 11:19:04 -0400},
Doi = {10.1101/gr.097261.109},
Journal = {Genome Res},
Keywords = {Human Genome Project, Sequence Alignment, Oligonucleotide Array Sequence Analysis, Asian Continental Ancestry Group, Genome: Human, African Continental Ancestry Group, Humans, Sequence Analysis: DNA},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Li/Genome%20Res%202010%20Li.pdf},
Month = {Feb},
Number = {2},
Pages = {265--72},
Pii = {gr.097261.109},
Pmid = {20019144},
Rating = {0},
Read = {Yes},
Title = {De novo assembly of human genomes with massively parallel short read sequencing},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p234},
Volume = {20},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAcIAAAAAAcIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkpRZHZW5vbWUgUmVzIDIwMTAgTGkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3jbzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAACTGkAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtkpQALZGwAC2LBAAXBKAAFwScAAg35AAIATk1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDEwOgBMaToAR2Vub21lIFJlcyAyMDEwIExpLnBkZgAOAC4AFgBHAGUAbgBvAG0AZQAgAFIAZQBzACAAMgAwADEAMAAgAEwAaQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAO1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9MaS9HZW5vbWUgUmVzIDIwMTAgTGkucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA1Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDEwL0xpL0dlbm9tZSBSZXMgMjAxMCBMaS5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACZgJoAm0CdgKBAoUCkwKaAqMC2wLgAuMC8AL1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAwc=},
Bdsk-Url-1 = {http://dx.doi.org/10.1101/gr.097261.109}}
@article{Kunin:2008p16,
Author = {V Kunin and A Copeland and A Lapidus and K Mavromatis and P Hugenholtz},
Date-Added = {2009-12-03 09:26:56 -0500},
Date-Modified = {2009-12-03 09:27:13 -0500},
Doi = {10.1128/MMBR.00009-08},
Journal = {Microbiology and Molecular Biology Reviews},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2008/Kunin/Microbiology%20and%20Molecular%20Biology%20Reviews%202008%20Kunin.pdf},
Month = {Dec},
Number = {4},
Pages = {557--578},
Rating = {0},
Read = {Yes},
Title = {A Bioinformatician's Guide to Metagenomics},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p16},
Volume = {72},
Year = {2008},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAj4AAAAAAj4AAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjuh9NaWNyb2Jpb2xvZ3kgYW5kIE1vbGUjQjc3MEMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3cMzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAFS3VuaW4AABAACAAAzDES8gAAABEACAAAzJt1jwAAAAEAGAALY7oAC2OlAAtiwQAFwSgABcEnAAIN+QACAFpNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAwODoAS3VuaW46AE1pY3JvYmlvbG9neSBhbmQgTW9sZSNCNzcwQy5wZGYADgB0ADkATQBpAGMAcgBvAGIAaQBvAGwAbwBnAHkAIABhAG4AZAAgAE0AbwBsAGUAYwB1AGwAYQByACAAQgBpAG8AbABvAGcAeQAgAFIAZQB2AGkAZQB3AHMAIAAyADAAMAA4ACAASwB1AG4AaQBuAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBhVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA4L0t1bmluL01pY3JvYmlvbG9neSBhbmQgTW9sZWN1bGFyIEJpb2xvZ3kgUmV2aWV3cyAyMDA4IEt1bmluLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QWy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwOC9LdW5pbi9NaWNyb2Jpb2xvZ3kgYW5kIE1vbGVjdWxhciBCaW9sb2d5IFJldmlld3MgMjAwOCBLdW5pbi5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKAC4gLkAukC8gL9AwEDDwMWAx8DfQOCA4UDkgOXAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA6k=},
Bdsk-Url-1 = {http://dx.doi.org/10.1128/MMBR.00009-08}}
@article{Chaisson:2004p1285,
Abstract = {Available from the web at http://www.cse.ucsd.edu/groups/bioinformatics/software.html},
Affiliation = {Bioinformatics Program, University of California San Diego, La Jolla, CA 92093, USA. mchaisso@bioinf.ucsd.edu},
Author = {Mark Chaisson and Pavel Pevzner and Haixu Tang},
Date-Added = {2012-04-30 09:25:24 -0400},
Date-Modified = {2012-04-30 09:25:30 -0400},
Doi = {10.1093/bioinformatics/bth205},
Journal = {Bioinformatics},
Keywords = {Feasibility Studies, Molecular Sequence Data, Base Sequence, Contig Mapping, Algorithms, Gene Expression Profiling, Sequence Alignment, Sequence Analysis: DNA},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2004/Chaisson/Bioinformatics%202004%20Chaisson.pdf},
Month = {Sep},
Number = {13},
Pages = {2067--74},
Pii = {bth205},
Pmid = {15059830},
Rating = {0},
Read = {Yes},
Title = {Fragment assembly with short reads},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1285},
Volume = {20},
Year = {2004},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfwAAAAAAfwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjNh9CaW9pbmZvcm1hdGljcyAyMDA0IEMjQjdCRkQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3v9zJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAIQ2hhaXNzb24AEAAIAADMMRLyAAAAEQAIAADMm3WPAAAAAQAYAAtjNgALYzUAC2LBAAXBKAAFwScAAg35AAIAXU1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA0OgBDaGFpc3NvbjoAQmlvaW5mb3JtYXRpY3MgMjAwNCBDI0I3QkZELnBkZgAADgBCACAAQgBpAG8AaQBuAGYAbwByAG0AYQB0AGkAYwBzACAAMgAwADAANAAgAEMAaABhAGkAcwBzAG8AbgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAS1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwNC9DaGFpc3Nvbi9CaW9pbmZvcm1hdGljcyAyMDA0IENoYWlzc29uLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwNC9DaGFpc3Nvbi9CaW9pbmZvcm1hdGljcyAyMDA0IENoYWlzc29uLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKgAqICpwKwArsCvwLNAtQC3QMlAyoDLQM6Az8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADUQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bth205}}
@article{Schatz:2010p907,
Abstract = {Second-generation sequencing technology can now be used to sequence an entire human genome in a matter of days and at low cost. Sequence read lengths, initially very short, have rapidly increased since the technology first appeared, and we now are seeing a growing number of efforts to sequence large genomes de novo from these short reads. In this Perspective, we describe the issues associated with short-read assembly, the different types of data produced by second-gen sequencers, and the latest assembly algorithms designed for these data. We also review the genomes that have been assembled recently from short reads and make recommendations for sequencing strategies that will yield a high-quality assembly.},
Affiliation = {Center for Bioinformatics and Computational Biology, University of Maryland, College Park, Maryland 20742, USA.},
Author = {Michael C Schatz and Arthur L Delcher and Steven L Salzberg},
Date-Added = {2011-10-25 09:39:53 -0400},
Date-Modified = {2012-07-18 13:46:19 -0400},
Doi = {10.1101/gr.101360.109},
Journal = {Genome research},
Keywords = {Genome: Human, Genomics, Humans, Sequence Analysis: DNA, Base Sequence, Algorithms},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Schatz/Genome%20research%202010%20Schatz.pdf},
Month = {Sep},
Number = {9},
Pages = {1165--73},
Pii = {gr.101360.109},
Pmid = {20508146},
Rating = {0},
Title = {Assembly of large genomes using second-generation sequencing},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p907},
Volume = {20},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkxx9HZW5vbWUgcmVzZWFyY2ggMjAxMCBTY2hhdHoucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3rRzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGU2NoYXR6ABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZMcAC2RsAAtiwQAFwSgABcEnAAIN+QACAFtNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMDoAU2NoYXR6OgBHZW5vbWUgcmVzZWFyY2ggMjAxMCBTY2hhdHoucGRmAAAOAEAAHwBHAGUAbgBvAG0AZQAgAHIAZQBzAGUAYQByAGMAaAAgADIAMAAxADAAIABTAGMAaABhAHQAegAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIASFVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9TY2hhdHovR2Vub21lIHJlc2VhcmNoIDIwMTAgU2NoYXR6LnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBCLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDEwL1NjaGF0ei9HZW5vbWUgcmVzZWFyY2ggMjAxMCBTY2hhdHoucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgApYCmAKdAqYCsQK1AsMCygLTAxgDHQMgAy0DMgAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANE},
Bdsk-Url-1 = {http://dx.doi.org/10.1101/gr.101360.109}}
@article{Venter:2004p727,
Author = {J.C Venter and K Remington and J.F Heidelberg and A.L Halpern and D Rusch and J.A Eisen and D Wu and I Paulsen and K.E Nelson and W Nelson},
Date-Added = {2011-04-12 21:18:52 -0400},
Date-Modified = {2011-04-12 21:19:09 -0400},
Journal = {Science},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2004/Venter/Science%202004%20Venter-1.pdf},
Number = {5667},
Pages = {66},
Pmid = {4884189229460492625related:URHIBTogyEMJ},
Rating = {0},
Read = {Yes},
Title = {Environmental genome shotgun sequencing of the Sargasso Sea},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p727},
Volume = {304},
Year = {2004},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAdoAAAAAAdoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjRhlTY2llbmNlIDIwMDQgVmVudGVyLTEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3pNzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGVmVudGVyABAACAAAzDES8gAAABEACAAAzJt1jwAAAAEAGAALY0YAC2M1AAtiwQAFwSgABcEnAAIN+QACAFVNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAwNDoAVmVudGVyOgBTY2llbmNlIDIwMDQgVmVudGVyLTEucGRmAAAOADQAGQBTAGMAaQBlAG4AYwBlACAAMgAwADAANAAgAFYAZQBuAHQAZQByAC0AMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAQlVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwNC9WZW50ZXIvU2NpZW5jZSAyMDA0IFZlbnRlci0xLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA8Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA0L1ZlbnRlci9TY2llbmNlIDIwMDQgVmVudGVyLTEucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAn4CgAKFAo4CmQKdAqsCsgK7AvoC/wMCAw8DFAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAMm}}
@article{Medvedev:2011p1284,
Abstract = {pmedvedev@cs.ucsd.edu.},
Affiliation = {Department of Computer Science and Engineering, University of California, San Diego, CA, USA. pmedvedev@cs.ucsd.edu},
Author = {Paul Medvedev and Eric Scott and Boyko Kakaradov and Pavel Pevzner},
Date-Added = {2012-04-30 09:23:20 -0400},
Date-Modified = {2012-04-30 09:23:28 -0400},
Doi = {10.1093/bioinformatics/btr208},
Journal = {Bioinformatics},
Keywords = {Single-Cell Analysis, Escherichia coli, Algorithms, Models: Statistical, High-Throughput Nucleotide Sequencing},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Medvedev/Bioinformatics%202011%20Medvedev.pdf},
Month = {Jul},
Number = {13},
Pages = {i137--41},
Pii = {btr208},
Pmid = {21685062},
Rating = {0},
Read = {Yes},
Title = {Error correction of high-throughput sequencing datasets with non-uniform coverage},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1284},
Volume = {27},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfwAAAAAAfwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlHR9CaW9pbmZvcm1hdGljcyAyMDExIE0jQjdCRjgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3v4zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAITWVkdmVkZXYAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtlHQALZPMAC2LBAAXBKAAFwScAAg35AAIAXU1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDExOgBNZWR2ZWRldjoAQmlvaW5mb3JtYXRpY3MgMjAxMSBNI0I3QkY4LnBkZgAADgBCACAAQgBpAG8AaQBuAGYAbwByAG0AYQB0AGkAYwBzACAAMgAwADEAMQAgAE0AZQBkAHYAZQBkAGUAdgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAS1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMS9NZWR2ZWRldi9CaW9pbmZvcm1hdGljcyAyMDExIE1lZHZlZGV2LnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMS9NZWR2ZWRldi9CaW9pbmZvcm1hdGljcyAyMDExIE1lZHZlZGV2LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKgAqICpwKwArsCvwLNAtQC3QMlAyoDLQM6Az8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADUQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btr208}}
@article{Nakamura:2011p741,
Abstract = {We identified the sequence-specific starting positions of consecutive miscalls in the mapping of reads obtained from the Illumina Genome Analyser (GA). Detailed analysis of the miscall pattern indicated that the underlying mechanism involves sequence-specific interference of the base elongation process during sequencing. The two major sequence patterns that trigger this sequence-specific error (SSE) are: (i) inverted repeats and (ii) GGC sequences. We speculate that these sequences favor dephasing by inhibiting single-base elongation, by: (i) folding single-stranded DNA and (ii) altering enzyme preference. This phenomenon is a major cause of sequence coverage variability and of the unfavorable bias observed for population-targeted methods such as RNA-seq and ChIP-seq. Moreover, SSE is a potential cause of false single-nucleotide polymorphism (SNP) calls and also significantly hinders de novo assembly. This article highlights the importance of recognizing SSE and its underlying mechanisms in the hope of enhancing the potential usefulness of the Illumina sequencers.},
Affiliation = {Graduate School of Information Science, Graduate School of Biological Sciences, Nara Institute of Science and Technology, 8916-5 Takayama-cho, Ikoma, Nara 630-0192, Japan.},
Author = {Kensuke Nakamura and Taku Oshima and Takuya Morimoto and Shun Ikeda and Hirofumi Yoshikawa and Yuh Shiwa and Shu Ishikawa and Margaret C Linak and Aki Hirai and Hiroki Takahashi and Md Altaf-Ul-Amin and Naotake Ogasawara and Shigehiko Kanaya},
Date-Added = {2011-05-19 20:58:38 -0400},
Date-Modified = {2012-07-18 13:47:46 -0400},
Doi = {10.1093/nar/gkr344},
Journal = {Nucleic Acids Research},
Keywords = {Sequence Analysis: DNA, Genome: Bacterial, Bacillus subtilis, Base Pair Mismatch, Inverted Repeat Sequences, Sequence Analysis: RNA, Chromosome Mapping},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Nakamura/Nucleic%20Acids%20Research%202011%20Nakamura.pdf},
Month = {Jul},
Number = {13},
Pages = {e90},
Pii = {gkr344},
Pmid = {21576222},
Rating = {0},
Title = {Sequence-specific error profile of Illumina sequencers},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p741},
Volume = {39},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAhQAAAAAAhQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlIR9OdWNsZWljIEFjaWRzIFJlc2VhcmMjQjdBN0QucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3p9zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAITmFrYW11cmEAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtlIQALZPMAC2LBAAXBKAAFwScAAg35AAIAXU1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDExOgBOYWthbXVyYToATnVjbGVpYyBBY2lkcyBSZXNlYXJjI0I3QTdELnBkZgAADgBSACgATgB1AGMAbABlAGkAYwAgAEEAYwBpAGQAcwAgAFIAZQBzAGUAYQByAGMAaAAgADIAMAAxADEAIABOAGEAawBhAG0AdQByAGEALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAFNVc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMTEvTmFrYW11cmEvTnVjbGVpYyBBY2lkcyBSZXNlYXJjaCAyMDExIE5ha2FtdXJhLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QTS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMS9OYWthbXVyYS9OdWNsZWljIEFjaWRzIFJlc2VhcmNoIDIwMTEgTmFrYW11cmEucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgArgCugK/AsgC0wLXAuUC7AL1A0UDSgNNA1oDXwAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANx},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkr344}}
@article{Li:2009p707,
Abstract = {RAMMCAP is a very fast method that can cluster and annotate one million metagenomic reads in only hundreds of CPU hours. It is available from http://tools.camera.calit2.net/camera/rammcap/.},
Affiliation = {California Institute for Telecommunications and Information Technology, University of California, San Diego, La Jolla, California 92093, USA. liwz@sdsc.edu},
Author = {Weizhong Li},
Date-Added = {2011-02-23 19:20:57 -0500},
Date-Modified = {2012-07-18 13:48:20 -0400},
Doi = {10.1186/1471-2105-10-359},
Journal = {BMC Bioinformatics},
Keywords = {Pattern Recognition: Automated, Metagenomics, Sequence Analysis: DNA, Cluster Analysis, Computational Biology, Algorithms, Sequence Alignment},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Li/BMC%20Bioinformatics%202009%20Li.pdf},
Month = {Jan},
Pages = {359},
Pii = {1471-2105-10-359},
Pmid = {19863816},
Rating = {0},
Read = {Yes},
Title = {Analysis and comparison of very large metagenomes with fast clustering and functional annotation},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p707},
Volume = {10},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeIAAAAAAeIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkIR5CTUMgQmlvaW5mb3JtYXRpY3MgMjAwOSBMaS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3oNzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAACTGkAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtkIQALY+wAC2LBAAXBKAAFwScAAg35AAIAVk1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA5OgBMaToAQk1DIEJpb2luZm9ybWF0aWNzIDIwMDkgTGkucGRmAA4APgAeAEIATQBDACAAQgBpAG8AaQBuAGYAbwByAG0AYQB0AGkAYwBzACAAMgAwADAAOQAgAEwAaQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAQ1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9MaS9CTUMgQmlvaW5mb3JtYXRpY3MgMjAwOSBMaS5wZGYAABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfED0uLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMDkvTGkvQk1DIEJpb2luZm9ybWF0aWNzIDIwMDkgTGkucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAoYCiAKNApYCoQKlArMCugLDAwMDCAMLAxgDHQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAMv},
Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-10-359}}
@article{Anonymous:2012p797,
Date-Added = {2011-09-28 15:54:11 -0400},
Date-Modified = {2012-07-18 13:47:02 -0400},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/Unknown/Unknown/Untitled-p797.pdf},
Rating = {0},
Read = {Yes},
Title = {Bioinformatics and Biomedicine (BIBM) 2010 IEEE International Conference on 2010 Charuvaka},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p797},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAcQAAAAAAcQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlhhFVbnRpdGxlZC1wNzk3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qYzJsvQQAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHVW5rbm93bgAAEAAIAADMMRLyAAAAEQAIAADMm3WRAAAAAQAYAAtlhgALZX0AC2LBAAXBKAAFwScAAg35AAIAUU1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgBVbmtub3duOgBVbmtub3duOgBVbnRpdGxlZC1wNzk3LnBkZgAADgAkABEAVQBuAHQAaQB0AGwAZQBkAC0AcAA3ADkANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvVW5rbm93bi9Vbmtub3duL1VudGl0bGVkLXA3OTcucGRmABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDguLi8uLi9Eb2N1bWVudHMvUGFwZXJzL1Vua25vd24vVW5rbm93bi9VbnRpdGxlZC1wNzk3LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAJoAmoCbwJ4AoMChwKVApwCpQLgAuUC6AL1AvoAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADDA==}}
@article{Zhang:2012p959,
Abstract = {... YUAN ZHANG and YANNI SUN Dept ... the genetic diversity and composition of a plasmid metagenome from a wastewater treatment plant ... a basis for functional profiling, profile HMM-based homology search was also used for phylogenetic complexity analysis in metagenomic data ...},
Author = {Yuan Zhang and Yanni Sun},
Date-Added = {2011-11-01 15:33:22 -0400},
Date-Modified = {2011-11-01 15:39:54 -0400},
Journal = {Pacific Symposium on Biocomputing},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2012/Zhang/Pacific%20Symposium%20on%20Biocomputing%202012%20Zhang.pdf},
Rating = {0},
Title = {Metadomain: A profile HMM-based protein domain classification tool for short sequence},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p959},
Url = {http://psb.stanford.edu/psb-online/proceedings/psb12/zhang-y.pdf},
Year = {2012},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAiIAAAAAAiIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlbR9QYWNpZmljIFN5bXBvc2l1bSBvbiAjQjdBRTIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3rizJsvQQAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAFWmhhbmcAABAACAAAzDES8gAAABEACAAAzJt1kQAAAAEAGAALZW0AC2VSAAtiwQAFwSgABcEnAAIN+QACAFpNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMjoAWmhhbmc6AFBhY2lmaWMgU3ltcG9zaXVtIG9uICNCN0FFMi5wZGYADgBiADAAUABhAGMAaQBmAGkAYwAgAFMAeQBtAHAAbwBzAGkAdQBtACAAbwBuACAAQgBpAG8AYwBvAG0AcAB1AHQAaQBuAGcAIAAyADAAMQAyACAAWgBoAGEAbgBnAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBYVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEyL1poYW5nL1BhY2lmaWMgU3ltcG9zaXVtIG9uIEJpb2NvbXB1dGluZyAyMDEyIFpoYW5nLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBSLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDEyL1poYW5nL1BhY2lmaWMgU3ltcG9zaXVtIG9uIEJpb2NvbXB1dGluZyAyMDEyIFpoYW5nLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoALGAsgCzQLWAuEC5QLzAvoDAwNYA10DYANtA3IAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADhA==},
Bdsk-Url-1 = {http://psb.stanford.edu/psb-online/proceedings/psb12/zhang-y.pdf}}
@article{Altschul:1990p1335,
Abstract = {A new approach to rapid sequence comparison, basic local alignment search tool (BLAST), directly approximates alignments that optimize a measure of local similarity, the maximal segment pair (MSP) score. Recent mathematical results on the stochastic properties of MSP scores allow an analysis of the performance of this method as well as the statistical significance of alignments it generates. The basic algorithm is simple and robust; it can be implemented in a number of ways and applied in a variety of contexts including straightforward DNA and protein sequence database searches, motif searches, gene identification searches, and in the analysis of multiple regions of similarity in long DNA sequences. In addition to its flexibility and tractability to mathematical analysis, BLAST is an order of magnitude faster than existing sequence comparison tools of comparable sensitivity.},
Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894.},
Author = {S F Altschul and W Gish and W Miller and E W Myers and D J Lipman},
Date-Added = {2012-07-03 15:16:00 -0400},
Date-Modified = {2012-07-03 15:16:54 -0400},
Doi = {10.1016/S0022-2836(05)80360-2},
Journal = {J Mol Biol},
Keywords = {Software, Sequence Homology: Nucleic Acid, Algorithms, Base Sequence, Databases: Factual, Sensitivity and Specificity, Mutation, Amino Acid Sequence},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/1990/Altschul/J%20Mol%20Biol%201990%20Altschul.pdf},
Month = {Oct},
Number = {3},
Pages = {403--10},
Pii = {S0022-2836(05)80360-2},
Pmid = {2231712},
Rating = {0},
Title = {Basic local alignment search tool},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1335},
Volume = {215},
Year = {1990},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAewAAAAAAewAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtizBxKIE1vbCBCaW9sIDE5OTAgQWx0c2NodWwucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC30FzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAIQWx0c2NodWwAEAAIAADMMRLyAAAAEQAIAADMm3WPAAAAAQAYAAtizAALYssAC2LBAAXBKAAFwScAAg35AAIAWk1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAxOTkwOgBBbHRzY2h1bDoASiBNb2wgQmlvbCAxOTkwIEFsdHNjaHVsLnBkZgAOADoAHABKACAATQBvAGwAIABCAGkAbwBsACAAMQA5ADkAMAAgAEEAbAB0AHMAYwBoAHUAbAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAR1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMTk5MC9BbHRzY2h1bC9KIE1vbCBCaW9sIDE5OTAgQWx0c2NodWwucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBBLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8xOTkwL0FsdHNjaHVsL0ogTW9sIEJpb2wgMTk5MCBBbHRzY2h1bC5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACkAKSApcCoAKrAq8CvQLEAs0DEQMWAxkDJgMrAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAz0=},
Bdsk-Url-1 = {http://dx.doi.org/10.1016/S0022-2836(05)80360-2}}
@article{Kelley:2010p1260,
Abstract = {We introduce Quake, a program to detect and correct errors in DNA sequencing reads. Using a maximum likelihood approach incorporating quality values and nucleotide specific miscall rates, Quake achieves the highest accuracy on realistically simulated reads. We further demonstrate substantial improvements in de novo assembly and SNP detection after using Quake. Quake can be used for any size project, including more than one billion human reads, and is freely available as open source software from http://www.cbcb.umd.edu/software/quake.},
Affiliation = {Center for Bioinformatics and Computational Biology, Institute for Advanced Computer Studies, and Department of Computer Science, University of Maryland, College Park, MD 20742, USA. dakelley@umiacs.umd.edu},
Author = {David R Kelley and Michael C Schatz and Steven L Salzberg},
Date-Added = {2012-03-03 20:42:52 -0500},
Date-Modified = {2012-03-05 23:34:47 -0500},
Doi = {10.1186/gb-2010-11-11-r116},
Journal = {Genome Biology},
Keywords = {Sequence Alignment, Models: Biological, Genome: Human, DNA: Bacterial, Software, Escherichia coli, Likelihood Functions, Sequence Analysis: DNA, Polymorphism: Single Nucleotide, Computational Biology, Humans, Algorithms},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Kelley/Genome%20Biology%202010%20Kelley.pdf},
Month = {Jan},
Number = {11},
Pages = {R116},
Pii = {gb-2010-11-11-r116},
Pmid = {21114842},
Rating = {0},
Read = {Yes},
Title = {Quake: quality-aware detection and correction of sequencing errors},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1260},
Volume = {11},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAe4AAAAAAe4AAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtknR5HZW5vbWUgQmlvbG9neSAyMDEwIEtlbGxleS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3t8zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGS2VsbGV5ABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZJ0AC2RsAAtiwQAFwSgABcEnAAIN+QACAFpNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMDoAS2VsbGV5OgBHZW5vbWUgQmlvbG9neSAyMDEwIEtlbGxleS5wZGYADgA+AB4ARwBlAG4AbwBtAGUAIABCAGkAbwBsAG8AZwB5ACAAMgAwADEAMAAgAEsAZQBsAGwAZQB5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBHVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEwL0tlbGxleS9HZW5vbWUgQmlvbG9neSAyMDEwIEtlbGxleS5wZGYAABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEEEuLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMTAvS2VsbGV5L0dlbm9tZSBCaW9sb2d5IDIwMTAgS2VsbGV5LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKSApQCmQKiAq0CsQK/AsYCzwMTAxgDGwMoAy0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADPw==},
Bdsk-Url-1 = {http://dx.doi.org/10.1186/gb-2010-11-11-r116}}
@article{Pignatelli:2011p742,
Abstract = {A frequent step in metagenomic data analysis comprises the assembly of the sequenced reads. Many assembly tools have been published in the last years targeting data coming from next-generation sequencing (NGS) technologies but these assemblers have not been designed for or tested in multi-genome scenarios that characterize metagenomic studies. Here we provide a critical assessment of current de novo short reads assembly tools in multi-genome scenarios using complex simulated metagenomic data. With this approach we tested the fidelity of different assemblers in metagenomic studies demonstrating that even under the simplest compositions the number of chimeric contigs involving different species is noticeable. We further showed that the assembly process reduces the accuracy of the functional classification of the metagenomic data and that these errors can be overcome raising the coverage of the studied metagenome. The results presented here highlight the particular difficulties that de novo genome assemblers face in multi-genome scenarios demonstrating that these difficulties, that often compromise the functional classification of the analyzed data, can be overcome with a high sequencing effort.},
Affiliation = {Unitat Mixta d'Investigaci{\'o} en Gen{\`o}mica i Salut, Centre Superior d'Investigaci{\'o} en Salut P{\'u}blica/UVEG-Institut Cavanilles, Valencia, Spain. mp@ebi.ac.uk},
Author = {Miguel Pignatelli and Andr{\'e}s Moya},
Date-Added = {2011-05-28 17:23:44 -0400},
Date-Modified = {2012-07-18 13:47:35 -0400},
Doi = {10.1371/journal.pone.0019984},
Journal = {PLoS ONE},
Keywords = {Genomics, Humans, Phylogeny},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Pignatelli/PLoS%20ONE%202011%20Pignatelli.pdf},
Month = {Jan},
Number = {5},
Pages = {e19984},
Pii = {PONE-D-10-06437},
Pmid = {21625384},
Rating = {0},
Read = {Yes},
Title = {Evaluating the fidelity of de novo short read metagenomic assembly using simulated data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p742},
Volume = {6},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlKxxQTG9TIE9ORSAyMDExIFBpZ25hdGVsbGkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qBzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAKUGlnbmF0ZWxsaQAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2UrAAtk8wALYsEABcEoAAXBJwACDfkAAgBcTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTE6AFBpZ25hdGVsbGk6AFBMb1MgT05FIDIwMTEgUGlnbmF0ZWxsaS5wZGYADgA6ABwAUABMAG8AUwAgAE8ATgBFACAAMgAwADEAMQAgAFAAaQBnAG4AYQB0AGUAbABsAGkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAElVc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMTEvUGlnbmF0ZWxsaS9QTG9TIE9ORSAyMDExIFBpZ25hdGVsbGkucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBDLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDExL1BpZ25hdGVsbGkvUExvUyBPTkUgMjAxMSBQaWduYXRlbGxpLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKWApgCnQKmArECtQLDAsoC0wMZAx4DIQMuAzMAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADRQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pone.0019984}}
@article{Pop:2009p798,
Abstract = {Research into genome assembly algorithms has experienced a resurgence due to new challenges created by the development of next generation sequencing technologies. Several genome assemblers have been published in recent years specifically targeted at the new sequence data; however, the ever-changing technological landscape leads to the need for continued research. In addition, the low cost of next generation sequencing data has led to an increased use of sequencing in new settings. For example, the new field of metagenomics relies on large-scale sequencing of entire microbial communities instead of isolate genomes, leading to new computational challenges. In this article, we outline the major algorithmic approaches for genome assembly and describe recent developments in this domain.},
Affiliation = {Department of Computer Science and the Center for Bioinformatics and Computational Biology at the University of Maryland, College Park, MD 20742, USA. mpop@umd.edu},
Author = {Mihai Pop},
Date-Added = {2011-09-28 16:00:28 -0400},
Date-Modified = {2012-07-18 13:46:54 -0400},
Doi = {10.1093/bib/bbp026},
Journal = {Briefings in bioinformatics},
Keywords = {Sequence Analysis: DNA, Genomics, Software, Databases: Genetic, Genome, Humans, Algorithms, Computational Biology},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Pop/Briefings%20in%20bioinformatics%202009%20Pop.pdf},
Month = {Jul},
Number = {4},
Pages = {354--66},
Pii = {bbp026},
Pmid = {19482960},
Rating = {0},
Read = {Yes},
Title = {Genome assembly reborn: recent computational challenges},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p798},
Volume = {10},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAgQAAAAAAgQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkLR9CcmllZmluZ3MgaW4gYmlvaW5mb3IjQjdBOUMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qczJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAADUG9wAAAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2QtAAtj7AALYsEABcEoAAXBJwACDfkAAgBYTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMDk6AFBvcDoAQnJpZWZpbmdzIGluIGJpb2luZm9yI0I3QTlDLnBkZgAOAFIAKABCAHIAaQBlAGYAaQBuAGcAcwAgAGkAbgAgAGIAaQBvAGkAbgBmAG8AcgBtAGEAdABpAGMAcwAgADIAMAAwADkAIABQAG8AcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIATlVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Qb3AvQnJpZWZpbmdzIGluIGJpb2luZm9ybWF0aWNzIDIwMDkgUG9wLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBILi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA5L1BvcC9CcmllZmluZ3MgaW4gYmlvaW5mb3JtYXRpY3MgMjAwOSBQb3AucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAqgCqgKvArgCwwLHAtUC3ALlAzADNQM4A0UDSgAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANc},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bib/bbp026}}
@article{Zerbino:2008p665,
Abstract = {We have developed a new set of algorithms, collectively called "Velvet," to manipulate de Bruijn graphs for genomic sequence assembly. A de Bruijn graph is a compact representation based on short words (k-mers) that is ideal for high coverage, very short read (25-50 bp) data sets. Applying Velvet to very short reads and paired-ends information only, one can produce contigs of significant length, up to 50-kb N50 length in simulations of prokaryotic data and 3-kb N50 on simulated mammalian BACs. When applied to real Solexa data sets without read pairs, Velvet generated contigs of approximately 8 kb in a prokaryote and 2 kb in a mammalian BAC, in close agreement with our simulated results without read-pair information. Velvet represents a new approach to assembly that can leverage very short reads in combination with read pairs to produce useful assemblies.},
Affiliation = {EMBL-European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, United Kingdom.},
Author = {Daniel R Zerbino and Ewan Birney},
Date-Added = {2010-12-08 23:41:38 -0500},
Date-Modified = {2010-12-08 23:42:16 -0500},
Doi = {10.1101/gr.074492.107},
Journal = {Genome Res},
Keywords = {Algorithms, Genome: Bacterial, Humans, Streptococcus, Genomics, Mammals, Sequence Analysis: DNA, Genome: Human, Computer Simulation, Animals, Computational Biology, Chromosomes: Artificial: Bacterial},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2008/Zerbino/Genome%20Res%202008%20Zerbino.pdf},
Month = {May},
Number = {5},
Pages = {821--9},
Pii = {gr.074492.107},
Pmid = {18349386},
Rating = {0},
Read = {Yes},
Title = {Velvet: algorithms for de novo short read assembly using de Bruijn graphs},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p665},
Volume = {18},
Year = {2008},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeYAAAAAAeYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtj6htHZW5vbWUgUmVzIDIwMDggWmVyYmluby5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3l4zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHWmVyYmlubwAAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtj6gALY6UAC2LBAAXBKAAFwScAAg35AAIAWE1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA4OgBaZXJiaW5vOgBHZW5vbWUgUmVzIDIwMDggWmVyYmluby5wZGYADgA4ABsARwBlAG4AbwBtAGUAIABSAGUAcwAgADIAMAAwADgAIABaAGUAcgBiAGkAbgBvAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBFVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA4L1plcmJpbm8vR2Vub21lIFJlcyAyMDA4IFplcmJpbm8ucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA/Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA4L1plcmJpbm8vR2Vub21lIFJlcyAyMDA4IFplcmJpbm8ucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAooCjAKRApoCpQKpArcCvgLHAwkDDgMRAx4DIwAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAM1},
Bdsk-Url-1 = {http://dx.doi.org/10.1101/gr.074492.107}}
@article{Li:2001p1337,
Abstract = {We present a fast and flexible program for clustering large protein databases at different sequence identity levels. It takes less than 2 h for the all-against-all sequence comparison and clustering of the non-redundant protein database of over 560,000 sequences on a high-end PC. The output database, including only the representative sequences, can be used for more efficient and sensitive database searches.},
Affiliation = {San Diego Supercomputer Center, La Jolla, CA 92093, USA. liwz@sdsc.edu},
Author = {W Li and L Jaroszewski and A Godzik},
Date-Added = {2012-07-18 13:34:20 -0400},
Date-Modified = {2012-07-18 13:34:20 -0400},
Journal = {Bioinformatics},
Keywords = {Databases: Factual, Proteins, Algorithms, Sequence Analysis, Software},
Language = {eng},
Month = {Mar},
Number = {3},
Pages = {282--3},
Pmid = {11294794},
Rating = {0},
Title = {Clustering of highly homologous sequences to reduce the size of large protein databases},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1337},
Volume = {17},
Year = {2001}}
@article{Miller:2010p226,
Abstract = {The emergence of next-generation sequencing platforms led to resurgence of research in whole-genome shotgun assembly algorithms and software. DNA sequencing data from the Roche 454, Illumina/Solexa, and ABI SOLiD platforms typically present shorter read lengths, higher coverage, and different error profiles compared with Sanger sequencing data. Since 2005, several assembly software packages have been created or revised specifically for de novo assembly of next-generation sequencing data. This review summarizes and compares the published descriptions of packages named SSAKE, SHARCGS, VCAKE, Newbler, Celera Assembler, Euler, Velvet, ABySS, AllPaths, and SOAPdenovo. More generally, it compares the two standard methods known as the de Bruijn graph approach and the overlap/layout/consensus approach to assembly.},
Affiliation = {J. Craig Venter Institute, Rockville, MD 20850-3343, USA. jmiller@jcvi.org},
Author = {Jason R Miller and Sergey Koren and Granger Sutton},
Date-Added = {2010-06-07 13:20:23 -0400},
Date-Modified = {2010-06-07 13:22:56 -0400},
Doi = {10.1016/j.ygeno.2010.03.001},
Journal = {Genomics},
Keywords = {Next-generation sequencing, Genome assembly algorithms},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Miller/Genomics%202010%20Miller.pdf},
Month = {Jun},
Number = {6},
Pages = {315--27},
Pii = {S0888-7543(10)00049-2},
Pmid = {20211242},
Rating = {0},
Read = {Yes},
Title = {Assembly algorithms for next-generation sequencing data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p226},
Volume = {95},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAdYAAAAAAdYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkrxhHZW5vbWljcyAyMDEwIE1pbGxlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3jEzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGTWlsbGVyABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZK8AC2RsAAtiwQAFwSgABcEnAAIN+QACAFRNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMDoATWlsbGVyOgBHZW5vbWljcyAyMDEwIE1pbGxlci5wZGYADgAyABgARwBlAG4AbwBtAGkAYwBzACAAMgAwADEAMAAgAE0AaQBsAGwAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBBVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEwL01pbGxlci9HZW5vbWljcyAyMDEwIE1pbGxlci5wZGYAABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDsuLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMTAvTWlsbGVyL0dlbm9taWNzIDIwMTAgTWlsbGVyLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAJ6AnwCgQKKApUCmQKnAq4CtwL1AvoC/QMKAw8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADIQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.ygeno.2010.03.001}}
@article{Iverson:2012p1281,
Abstract = {Ecosystems are shaped by complex communities of mostly unculturable microbes. Metagenomes provide a fragmented view of such communities, but the ecosystem functions of major groups of organisms remain mysterious. To better characterize members of these communities, we developed methods to reconstruct genomes directly from mate-paired short-read metagenomes. We closed a genome representing the as-yet uncultured marine group II Euryarchaeota, assembled de novo from 1.7% of a metagenome sequenced from surface seawater. The genome describes a motile, photo-heterotrophic cell focused on degradation of protein and lipids and clarifies the origin of proteorhodopsin. It also demonstrates that high-coverage mate-paired sequence can overcome assembly difficulties caused by interstrain variation in complex microbial communities, enabling inference of ecosystem functions for uncultured members.},
Affiliation = {School of Oceanography, University of Washington, Seattle, WA 98195, USA.},
Author = {Vaughn Iverson and Robert M Morris and Christian D Frazar and Chris T Berthiaume and Rhonda L Morales and E Virginia Armbrust},
Date-Added = {2012-04-25 09:50:06 -0400},
Date-Modified = {2012-04-25 09:52:50 -0400},
Doi = {10.1126/science.1212665},
Journal = {Science},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2012/Iverson/Science%202012%20Iverson-1.pdf},
Month = {Feb},
Number = {6068},
Pages = {587--90},
Pii = {335/6068/587},
Pmid = {22301318},
Rating = {0},
Title = {Untangling genomes from metagenomes: revealing an uncultured class of marine Euryarchaeota},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1281},
Volume = {335},
Year = {2012},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeIAAAAAAeIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlVRpTY2llbmNlIDIwMTIgSXZlcnNvbi0xLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3vozJsvQQAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHSXZlcnNvbgAAEAAIAADMMRLyAAAAEQAIAADMm3WRAAAAAQAYAAtlVQALZVIAC2LBAAXBKAAFwScAAg35AAIAV01hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDEyOgBJdmVyc29uOgBTY2llbmNlIDIwMTIgSXZlcnNvbi0xLnBkZgAADgA2ABoAUwBjAGkAZQBuAGMAZQAgADIAMAAxADIAIABJAHYAZQByAHMAbwBuAC0AMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIARFVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMi9JdmVyc29uL1NjaWVuY2UgMjAxMiBJdmVyc29uLTEucGRmABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfED4uLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMTIvSXZlcnNvbi9TY2llbmNlIDIwMTIgSXZlcnNvbi0xLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKGAogCjQKWAqECpQKzAroCwwMEAwkDDAMZAx4AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADMA==},
Bdsk-Url-1 = {http://dx.doi.org/10.1126/science.1212665}}
@article{Keegan:2012p1336,
Abstract = {We provide a novel method, DRISEE (duplicate read inferred sequencing error estimation), to assess sequencing quality (alternatively referred to as "noise" or "error") within and/or between sequencing samples. DRISEE provides positional error estimates that can be used to inform read trimming within a sample. It also provides global (whole sample) error estimates that can be used to identify samples with high or varying levels of sequencing error that may confound downstream analyses, particularly in the case of studies that utilize data from multiple sequencing samples. For shotgun metagenomic data, we believe that DRISEE provides estimates of sequencing error that are more accurate and less constrained by technical limitations than existing methods that rely on reference genomes or the use of scores (e.g. Phred). Here, DRISEE is applied to (non amplicon) data sets from both the 454 and Illumina platforms. The DRISEE error estimate is obtained by analyzing sets of artifactual duplicate reads (ADRs), a known by-product of both sequencing platforms. We present DRISEE as an open-source, platform-independent method to assess sequencing error in shotgun metagenomic data, and utilize it to discover previously uncharacterized error in de novo sequence data from the 454 and Illumina sequencing platforms.},
Affiliation = {Argonne National Laboratory, Argonne, Illinois, United States of America.},
Author = {Kevin P Keegan and William L Trimble and Jared Wilkening and Andreas Wilke and Travis Harrison and Mark D'Souza and Folker Meyer},
Date-Added = {2012-07-16 08:43:30 -0400},
Date-Modified = {2012-07-16 08:44:17 -0400},
Doi = {10.1371/journal.pcbi.1002541},
Journal = {PLoS Comput Biol},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2012/Keegan/PLoS%20Comput%20Biol%202012%20Keegan.pdf},
Month = {Jun},
Number = {6},
Pages = {e1002541},
Pii = {PCOMPBIOL-D-11-01820},
Pmid = {22685393},
Rating = {0},
Read = {Yes},
Title = {A Platform-Independent Method for Detecting Errors in Metagenomic Sequencing Data: DRISEE},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1336},
Volume = {8},
Year = {2012},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfYAAAAAAfYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlWB9QTG9TIENvbXB1dCBCaW9sIDIwMTIjQjdEMDkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC30JzJsvQQAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGS2VlZ2FuABAACAAAzDES8gAAABEACAAAzJt1kQAAAAEAGAALZVgAC2VSAAtiwQAFwSgABcEnAAIN+QACAFtNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMjoAS2VlZ2FuOgBQTG9TIENvbXB1dCBCaW9sIDIwMTIjQjdEMDkucGRmAAAOAEIAIABQAEwAbwBTACAAQwBvAG0AcAB1AHQAIABCAGkAbwBsACAAMgAwADEAMgAgAEsAZQBlAGcAYQBuAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBJVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEyL0tlZWdhbi9QTG9TIENvbXB1dCBCaW9sIDIwMTIgS2VlZ2FuLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QQy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMi9LZWVnYW4vUExvUyBDb21wdXQgQmlvbCAyMDEyIEtlZWdhbi5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACmgKcAqECqgK1ArkCxwLOAtcDHQMiAyUDMgM3AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA0k=},
Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pcbi.1002541}}
@article{Hoff:2009p913,
Abstract = {Metagenomic sequencing projects yield numerous sequencing reads of a diverse range of uncultivated and mostly yet unknown microorganisms. In many cases, these sequencing reads cannot be assembled into longer contigs. Thus, gene prediction tools that were originally developed ...},
Author = {K Hoff and T Lingner and P Meinicke{\ldots}},
Date-Added = {2011-11-01 15:31:59 -0400},
Date-Modified = {2011-11-01 15:32:22 -0400},
Journal = {Nucleic Acids Research},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Hoff/Nucleic%20Acids%20Research%202009%20Hoff.pdf},
Month = {Jan},
Pmid = {4710994699581097396related:tIVHJ7jQYEEJ},
Rating = {0},
Title = {Orphelia: predicting genes in metagenomic sequencing reads},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p913},
Url = {http://nar.oxfordjournals.org/content/37/suppl_2/W101.short},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfwAAAAAAfwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkBR9OdWNsZWljIEFjaWRzIFJlc2VhcmMjQjdBREQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3rdzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAESG9mZgAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2QFAAtj7AALYsEABcEoAAXBJwACDfkAAgBZTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMDk6AEhvZmY6AE51Y2xlaWMgQWNpZHMgUmVzZWFyYyNCN0FERC5wZGYAAA4ASgAkAE4AdQBjAGwAZQBpAGMAIABBAGMAaQBkAHMAIABSAGUAcwBlAGEAcgBjAGgAIAAyADAAMAA5ACAASABvAGYAZgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAS1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Ib2ZmL051Y2xlaWMgQWNpZHMgUmVzZWFyY2ggMjAwOSBIb2ZmLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Ib2ZmL051Y2xlaWMgQWNpZHMgUmVzZWFyY2ggMjAwOSBIb2ZmLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKgAqICpwKwArsCvwLNAtQC3QMlAyoDLQM6Az8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADUQ==},
Bdsk-Url-1 = {http://nar.oxfordjournals.org/content/37/suppl_2/W101.short}}
@article{Rho:2010p397,
Author = {M Rho and H Tang and Y Ye},
Date-Added = {2010-10-11 09:54:14 -0400},
Date-Modified = {2010-10-11 09:54:24 -0400},
Journal = {Nucleic Acids Research},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Rho/Nucleic%20Acids%20Research%202010%20Rho.pdf},
Pmid = {related:zhrADEIjVBQJ},
Rating = {0},
Read = {Yes},
Title = {FragGeneScan: predicting genes in short and error-prone reads},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p397},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfYAAAAAAfYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkwR9OdWNsZWljIEFjaWRzIFJlc2VhcmMjQjc5MTYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3kWzJsvQFBERiBDQVJPAAIABQAACSAAAAAAAAAAAAAAAAAAAAADUmhvAAAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2TBAAtkbAALYsEABcEoAAXBJwACDfkAAgBYTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTA6AFJobzoATnVjbGVpYyBBY2lkcyBSZXNlYXJjI0I3OTE2LnBkZgAOAEgAIwBOAHUAYwBsAGUAaQBjACAAQQBjAGkAZABzACAAUgBlAHMAZQBhAHIAYwBoACAAMgAwADEAMAAgAFIAaABvAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBJVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDEwL1Joby9OdWNsZWljIEFjaWRzIFJlc2VhcmNoIDIwMTAgUmhvLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QQy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9SaG8vTnVjbGVpYyBBY2lkcyBSZXNlYXJjaCAyMDEwIFJoby5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACmgKcAqECqgK1ArkCxwLOAtcDHQMiAyUDMgM3AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA0k=}}
@article{Niu:2010p1333,
Abstract = {Our method is available from http://cd-hit.org as a downloadable program and a web server. It is important not only to identify the duplicates from metagenomic datasets but also to distinguish whether they are artificial or natural duplicates. We provide a tool to estimate the number of natural duplicates according to user-defined sample types, so users can decide whether to retain or remove duplicates in their projects.},
Affiliation = {California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California 92093, USA.},
Author = {Beifang Niu and Limin Fu and Shulei Sun and Weizhong Li},
Date-Added = {2012-07-03 13:22:22 -0400},
Date-Modified = {2012-07-03 13:22:40 -0400},
Doi = {10.1186/1471-2105-11-187},
Journal = {BMC Bioinformatics},
Keywords = {Algorithms, Genome, Databases: Genetic, Internet, Metagenomics, Software, Sequence Analysis: DNA},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Niu/BMC%20Bioinformatics%202010%20Niu.pdf},
Month = {Jan},
Pages = {187},
Pii = {1471-2105-11-187},
Pmid = {20388221},
Rating = {0},
Title = {Artificial and natural duplicates in pyrosequencing reads of metagenomic data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1333},
Volume = {11},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeoAAAAAAeoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkth9CTUMgQmlvaW5mb3JtYXRpY3MgMjAxMCBOaXUucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3z6zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAADTml1AAAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2S2AAtkbAALYsEABcEoAAXBJwACDfkAAgBYTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTA6AE5pdToAQk1DIEJpb2luZm9ybWF0aWNzIDIwMTAgTml1LnBkZgAOAEAAHwBCAE0AQwAgAEIAaQBvAGkAbgBmAG8AcgBtAGEAdABpAGMAcwAgADIAMAAxADAAIABOAGkAdQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIARVVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9OaXUvQk1DIEJpb2luZm9ybWF0aWNzIDIwMTAgTml1LnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QPy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9OaXUvQk1DIEJpb2luZm9ybWF0aWNzIDIwMTAgTml1LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKOApAClQKeAqkCrQK7AsICywMNAxIDFQMiAycAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADOQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-11-187}}
@article{Barabasi:1999p1083,
Author = {A.L Barab{\'a}si and R Albert},
Date-Added = {2011-11-14 17:45:54 -0500},
Date-Modified = {2011-11-14 17:47:55 -0500},
Journal = {Science},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/1999/Barab%C3%A1si/Science%201999%20Barab%C3%A1si.pdf},
Number = {5439},
Pages = {509},
Pmid = {10638755925462666384related:kMxIzSV0pJMJ},
Rating = {0},
Title = {Emergence of scaling in random networks},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1083},
Volume = {286},
Year = {1999},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeYAAAAAAeYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAti9BlTY2llbmNlIDE5OTkgQmFyYWKHc2kucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3syzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAIQmFyYWKHc2kAEAAIAADMMRLyAAAAEQAIAADMm3WPAAAAAQAYAAti9AALYvMAC2LBAAXBKAAFwScAAg35AAIAV01hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAxOTk5OgBCYXJhYodzaToAU2NpZW5jZSAxOTk5IEJhcmFih3NpLnBkZgAADgA2ABoAUwBjAGkAZQBuAGMAZQAgADEAOQA5ADkAIABCAGEAcgBhAGIAYQMBAHMAaQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIASFVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMTk5OS9CYXJhYmHMgXNpL1NjaWVuY2UgMTk5OSBCYXJhYmHMgXNpLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0bxBAAC4ALgAvAC4ALgAvAEQAbwBjAHUAbQBlAG4AdABzAC8AUABhAHAAZQByAHMALwAxADkAOQA5AC8AQgBhAHIAYQBiAGEDAQBzAGkALwBTAGMAaQBlAG4AYwBlACAAMQA5ADkAOQAgAEIAYQByAGEAYgBhAwEAcwBpAC4AcABkAGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACigKMApECmgKlAqkCtwK+AscDSgNPA1IDXwNkAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA3Y=}}
@article{Henry:2011p799,
Abstract = {... Biochimica et Biophysica Acta (BBA) - General Subjects Article in Press, Accepted Manuscript - Note to users. doi: 10.1016 / j . bbagen . 2011.03 . 010 | How to Cite or Link Using DOI Copyright {\copyright} 2010 Published by Elsevier BV. Permissions {\&} Reprints. ...},
Author = {C Henry and R Overbeek and F Xia and A Best{\ldots}},
Date-Added = {2011-09-28 16:31:32 -0400},
Date-Modified = {2011-09-28 16:31:44 -0400},
Journal = {{\ldots} et Biophysica Acta (BBA {\ldots}},
Keywords = {Metagenomics, SEED, MG-RAST, Assembly, RAST, Genome-scale metabolic models},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Henry/%E2%80%A6%20et%20Biophysica%20Acta%20(BBA%20%E2%80%A6%202011%20Henry.pdf},
Month = {Jan},
Rating = {0},
Read = {Yes},
Title = {Connecting Genotype to Phenotype in the Era of High-throughput Sequencing},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p799},
Url = {http://www.sciencedirect.com/science/article/pii/S0304416511000596},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAhQAAAAAAhQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlCh/JIGV0IEJpb3BoeXNpY2EgQWN0YSAjQjdBQTAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qgzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAFSGVucnkAABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZQoAC2TzAAtiwQAFwSgABcEnAAIN+QACAFpNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMToASGVucnk6AMkgZXQgQmlvcGh5c2ljYSBBY3RhICNCN0FBMC5wZGYADgBWACogJgAgAGUAdAAgAEIAaQBvAHAAaAB5AHMAaQBjAGEAIABBAGMAdABhACAAKABCAEIAQQAgICYAIAAyADAAMQAxACAASABlAG4AcgB5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBWVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDExL0hlbnJ5L+KApiBldCBCaW9waHlzaWNhIEFjdGEgKEJCQSDigKYgMjAxMSBIZW5yeS5wZGYAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdG8QTAAuAC4ALwAuAC4ALwBEAG8AYwB1AG0AZQBuAHQAcwAvAFAAYQBwAGUAcgBzAC8AMgAwADEAMQAvAEgAZQBuAHIAeQAvICYAIABlAHQAIABCAGkAbwBwAGgAeQBzAGkAYwBhACAAQQBjAHQAYQAgACgAQgBCAEEAICAmACAAMgAwADEAMQAgAEgAZQBuAHIAeQAuAHAAZABm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgArgCugK/AsgC0wLXAuUC7AL1A5ADlQOYA6UDqgAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAO8},
Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0304416511000596}}
@article{Hess:2011p686,
Abstract = {The paucity of enzymes that efficiently deconstruct plant polysaccharides represents a major bottleneck for industrial-scale conversion of cellulosic biomass into biofuels. Cow rumen microbes specialize in degradation of cellulosic plant material, but most members of this complex community resist cultivation. To characterize biomass-degrading genes and genomes, we sequenced and analyzed 268 gigabases of metagenomic DNA from microbes adherent to plant fiber incubated in cow rumen. From these data, we identified 27,755 putative carbohydrate-active genes and expressed 90 candidate proteins, of which 57% were enzymatically active against cellulosic substrates. We also assembled 15 uncultured microbial genomes, which were validated by complementary methods including single-cell genome sequencing. These data sets provide a substantially expanded catalog of genes and genomes participating in the deconstruction of cellulosic biomass.},
Affiliation = {Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA.},
Author = {Matthias Hess and Alexander Sczyrba and Rob Egan and Tae-Wan Kim and Harshal Chokhawala and Gary Schroth and Shujun Luo and Douglas S Clark and Feng Chen and Tao Zhang and Roderick I Mackie and Len A Pennacchio and Susannah G Tringe and Axel Visel and Tanja Woyke and Zhong Wang and Edward M Rubin},
Date-Added = {2011-01-28 15:01:46 -0500},
Date-Modified = {2012-07-18 13:52:38 -0400},
Doi = {10.1126/science.1200387},
Journal = {Science},
Keywords = {Animals, Poaceae, Cattle, Bacterial Proteins, Genome: Bacterial, Rumen, Cellulase, Cellulases, Bacteria, Biomass, Cellulose, Molecular Sequence Data, Metagenomics, Genes: Bacterial, Amino Acid Sequence, Sequence Analysis: DNA, Carbohydrate Metabolism, Molecular Sequence Annotation, Cellulose 1:4-beta-Cellobiosidase, Metagenome},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Hess/Science%202011%20Hess.pdf},
Month = {Jan},
Number = {6016},
Pages = {463--7},
Pii = {331/6016/463},
Pmid = {21273488},
Rating = {0},
Read = {Yes},
Title = {Metagenomic discovery of biomass-degrading genes and genomes from cow rumen},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p686},
Volume = {331},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAcQAAAAAAcQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlDBVTY2llbmNlIDIwMTEgSGVzcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3nGzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAESGVzcwAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2UMAAtk8wALYsEABcEoAAXBJwACDfkAAgBPTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTE6AEhlc3M6AFNjaWVuY2UgMjAxMSBIZXNzLnBkZgAADgAsABUAUwBjAGkAZQBuAGMAZQAgADIAMAAxADEAIABIAGUAcwBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA8VXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDExL0hlc3MvU2NpZW5jZSAyMDExIEhlc3MucGRmABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDYuLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMTEvSGVzcy9TY2llbmNlIDIwMTEgSGVzcy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACaAJqAm8CeAKDAocClQKcAqUC3gLjAuYC8wL4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAwo=},
Bdsk-Url-1 = {http://dx.doi.org/10.1126/science.1200387}}
@article{Simpson:2009p233,
Abstract = {Widespread adoption of massively parallel deoxyribonucleic acid (DNA) sequencing instruments has prompted the recent development of de novo short read assembly algorithms. A common shortcoming of the available tools is their inability to efficiently assemble vast amounts of data generated from large-scale sequencing projects, such as the sequencing of individual human genomes to catalog natural genetic variation. To address this limitation, we developed ABySS (Assembly By Short Sequences), a parallelized sequence assembler. As a demonstration of the capability of our software, we assembled 3.5 billion paired-end reads from the genome of an African male publicly released by Illumina, Inc. Approximately 2.76 million contigs > or =100 base pairs (bp) in length were created with an N50 size of 1499 bp, representing 68% of the reference human genome. Analysis of these contigs identified polymorphic and novel sequences not present in the human reference assembly, which were validated by alignment to alternate human assemblies and to other primate genomes.},
Affiliation = {Genome Sciences Centre, British Columbia Cancer Agency, Vancouver, British Columbia V5Z 4E6, Canada.},
Author = {Jared T Simpson and Kim Wong and Shaun D Jackman and Jacqueline E Schein and Steven J M Jones and Inan{\c c} Birol},
Date-Added = {2010-06-10 10:28:36 -0400},
Date-Modified = {2010-06-10 10:28:48 -0400},
Doi = {10.1101/gr.089532.108},
Journal = {Genome Res},
Keywords = {Sequence Analysis: DNA, Genetic Variation, Humans, Polymorphism: Genetic, Software, Reproducibility of Results, Contig Mapping, Escherichia coli K12, Animals, Genome: Human, Algorithms, Computational Biology},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Simpson/Genome%20Res%202009%20Simpson.pdf},
Month = {Jun},
Number = {6},
Pages = {1117--23},
Pii = {gr.089532.108},
Pmid = {19251739},
Rating = {0},
Title = {ABySS: a parallel assembler for short read sequence data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p233},
Volume = {19},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeYAAAAAAeYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkQBtHZW5vbWUgUmVzIDIwMDkgU2ltcHNvbi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3jYzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHU2ltcHNvbgAAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtkQAALY+wAC2LBAAXBKAAFwScAAg35AAIAWE1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA5OgBTaW1wc29uOgBHZW5vbWUgUmVzIDIwMDkgU2ltcHNvbi5wZGYADgA4ABsARwBlAG4AbwBtAGUAIABSAGUAcwAgADIAMAAwADkAIABTAGkAbQBwAHMAbwBuAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBFVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA5L1NpbXBzb24vR2Vub21lIFJlcyAyMDA5IFNpbXBzb24ucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA/Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA5L1NpbXBzb24vR2Vub21lIFJlcyAyMDA5IFNpbXBzb24ucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAooCjAKRApoCpQKpArcCvgLHAwkDDgMRAx4DIwAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAM1},
Bdsk-Url-1 = {http://dx.doi.org/10.1101/gr.089532.108}}
@article{Harismendy:2009p228,
Abstract = {BACKGROUND: Next generation sequencing (NGS) platforms are currently being utilized for targeted sequencing of candidate genes or genomic intervals to perform sequence-based association studies. To evaluate these platforms for this application, we analyzed human sequence generated by the Roche 454, Illumina GA, and the ABI SOLiD technologies for the same 260 kb in four individuals. RESULTS: Local sequence characteristics contribute to systematic variability in sequence coverage (>100-fold difference in per-base coverage), resulting in patterns for each NGS technology that are highly correlated between samples. A comparison of the base calls to 88 kb of overlapping ABI 3730xL Sanger sequence generated for the same samples showed that the NGS platforms all have high sensitivity, identifying >95% of variant sites. At high coverage, depth base calling errors are systematic, resulting from local sequence contexts; as the coverage is lowered additional 'random sampling' errors in base calling occur. CONCLUSIONS: Our study provides important insights into systematic biases and data variability that need to be considered when utilizing NGS platforms for population targeted sequencing studies.},
Affiliation = {Scripps Genomic Medicine, Scripps Translational Science Institute, The Scripps Research Institute, La Jolla, CA 92037, USA. oharis@scripps.edu},
Author = {Olivier Harismendy and Pauline C Ng and Robert L Strausberg and Xiaoyun Wang and Timothy B Stockwell and Karen Y Beeson and Nicholas J Schork and Sarah S Murray and Eric J Topol and Samuel Levy and Kelly A Frazer},
Date-Added = {2010-06-07 13:23:35 -0400},
Date-Modified = {2010-06-07 13:23:44 -0400},
Doi = {10.1186/gb-2009-10-3-r32},
Journal = {Genome Biol},
Keywords = {Sequence Analysis: DNA, Genetics: Population, Genotype, Sequence Alignment, False Positive Reactions, Polymorphism: Single Nucleotide, Oligonucleotide Array Sequence Analysis, Base Sequence, Humans, Computer Simulation},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Harismendy/Genome%20Biol%202009%20Harismendy.pdf},
Month = {Jan},
Number = {3},
Pages = {R32},
Pii = {gb-2009-10-3-r32},
Pmid = {19327155},
Rating = {0},
Title = {Evaluation of next generation sequencing platforms for population targeted sequencing studies},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p228},
Volume = {10},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAf4AAAAAAf4AAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkAR9HZW5vbWUgQmlvbCAyMDA5IEhhcmlzbWVuZHkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3jIzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAKSGFyaXNtZW5keQAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2QBAAtj7AALYsEABcEoAAXBJwACDfkAAgBfTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMDk6AEhhcmlzbWVuZHk6AEdlbm9tZSBCaW9sIDIwMDkgSGFyaXNtZW5keS5wZGYAAA4AQAAfAEcAZQBuAG8AbQBlACAAQgBpAG8AbAAgADIAMAAwADkAIABIAGEAcgBpAHMAbQBlAG4AZAB5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBMVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA5L0hhcmlzbWVuZHkvR2Vub21lIEJpb2wgMjAwOSBIYXJpc21lbmR5LnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBGLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA5L0hhcmlzbWVuZHkvR2Vub21lIEJpb2wgMjAwOSBIYXJpc21lbmR5LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKiAqQCqQKyAr0CwQLPAtYC3wMoAy0DMAM9A0IAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADVA==},
Bdsk-Url-1 = {http://dx.doi.org/10.1186/gb-2009-10-3-r32}}
@article{Peng:2011p898,
Abstract = {chin@cs.hku.hk.},
Affiliation = {Department of Computer Science, The University of Hong Kong, Hong Kong.},
Author = {Yu Peng and Henry C M Leung and S M Yiu and Francis Y L Chin},
Date-Added = {2011-10-18 13:20:41 -0400},
Date-Modified = {2012-07-18 13:46:32 -0400},
Doi = {10.1093/bioinformatics/btr216},
Journal = {Bioinformatics},
Keywords = {Genome: Bacterial, Metagenomics, Escherichia coli, Algorithms, Sequence Analysis: DNA, Software},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Peng/Bioinformatics%202011%20Peng.pdf},
Month = {Jul},
Number = {13},
Pages = {i94--101},
Pii = {btr216},
Pmid = {21685107},
Rating = {0},
Read = {Yes},
Title = {Meta-IDBA: a de Novo assembler for metagenomic data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p898},
Volume = {27},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAeAAAAAAAeAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlJxxCaW9pbmZvcm1hdGljcyAyMDExIFBlbmcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qzzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAEUGVuZwAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2UnAAtk8wALYsEABcEoAAXBJwACDfkAAgBWTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTE6AFBlbmc6AEJpb2luZm9ybWF0aWNzIDIwMTEgUGVuZy5wZGYADgA6ABwAQgBpAG8AaQBuAGYAbwByAG0AYQB0AGkAYwBzACAAMgAwADEAMQAgAFAAZQBuAGcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAENVc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMTEvUGVuZy9CaW9pbmZvcm1hdGljcyAyMDExIFBlbmcucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA9Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDExL1BlbmcvQmlvaW5mb3JtYXRpY3MgMjAxMSBQZW5nLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKEAoYCiwKUAp8CowKxArgCwQMBAwYDCQMWAxsAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADLQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btr216}}
@article{Mackelprang:2011p1087,
Abstract = {Permafrost contains an estimated 1672 Pg carbon (C), an amount roughly equivalent to the total currently contained within land plants and the atmosphere. This reservoir of C is vulnerable to decomposition as rising global temperatures cause the permafrost to thaw. During thaw, trapped organic matter may become more accessible for microbial degradation and result in greenhouse gas emissions. Despite recent advances in the use of molecular tools to study permafrost microbial communities, their response to thaw remains unclear. Here we use deep metagenomic sequencing to determine the impact of thaw on microbial phylogenetic and functional genes, and relate these data to measurements of methane emissions. Metagenomics, the direct sequencing of DNA from the environment, allows the examination of whole biochemical pathways and associated processes, as opposed to individual pieces of the metabolic puzzle. Our metagenome analyses reveal that during transition from a frozen to a thawed state there are rapid shifts in many microbial, phylogenetic and functional gene abundances and pathways. After one week of incubation at 5 $\,^{\circ}$C, permafrost metagenomes converge to be more similar to each other than while they are frozen. We find that multiple genes involved in cycling of C and nitrogen shift rapidly during thaw. We also construct the first draft genome from a complex soil metagenome, which corresponds to a novel methanogen. Methane previously accumulated in permafrost is released during thaw and subsequently consumed by methanotrophic bacteria. Together these data point towards the importance of rapid cycling of methane and nitrogen in thawing permafrost.},
Affiliation = {Department of Biology, California State University at Northridge, Northridge, California 91330, USA.},
Author = {Rachel Mackelprang and Mark P Waldrop and Kristen M DeAngelis and Maude M David and Krystle L Chavarria and Steven J Blazewicz and Edward M Rubin and Janet K Jansson},
Date-Added = {2011-12-02 09:00:48 -0500},
Date-Modified = {2012-07-18 13:52:00 -0400},
Doi = {10.1038/nature10576},
Journal = {Nature},
Keywords = {Soil Microbiology, Nitrogen Cycle, Metagenomics, Nitrogen, Metagenome, Carbon, Soil, Arctic Regions, Freezing, Oxidation-Reduction, Phylogeny, RNA: Ribosomal: 16S, Time Factors, DNA, Methane, Bacteria, Alaska, Temperature, Genes: rRNA, Carbon Cycle},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2011/Mackelprang/Nature%202011%20Mackelprang.pdf},
Month = {Dec},
Number = {7377},
Pages = {368--71},
Pii = {nature10576},
Pmid = {22056985},
Rating = {0},
Read = {Yes},
Title = {Metagenomic analysis of a permafrost microbial community reveals a rapid response to thaw},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1087},
Volume = {480},
Year = {2011},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlGxtOYXR1cmUgMjAxMSBNYWNrZWxwcmFuZy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3tBzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAALTWFja2VscHJhbmcAABAACAAAzDES8gAAABEACAAAzJt1kAAAAAEAGAALZRsAC2TzAAtiwQAFwSgABcEnAAIN+QACAFxNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMToATWFja2VscHJhbmc6AE5hdHVyZSAyMDExIE1hY2tlbHByYW5nLnBkZgAOADgAGwBOAGEAdAB1AHIAZQAgADIAMAAxADEAIABNAGEAYwBrAGUAbABwAHIAYQBuAGcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAElVc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMTEvTWFja2VscHJhbmcvTmF0dXJlIDIwMTEgTWFja2VscHJhbmcucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBDLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDExL01hY2tlbHByYW5nL05hdHVyZSAyMDExIE1hY2tlbHByYW5nLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKWApgCnQKmArECtQLDAsoC0wMZAx4DIQMuAzMAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADRQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1038/nature10576}}
@article{Hoffmann:2009p1027,
Abstract = {With few exceptions, current methods for short read mapping make use of simple seed heuristics to speed up the search. Most of the underlying matching models neglect the necessity to allow not only mismatches, but also insertions and deletions. Current evaluations indicate, however, that very different error models apply to the novel high-throughput sequencing methods. While the most frequent error-type in Illumina reads are mismatches, reads produced by 454's GS FLX predominantly contain insertions and deletions (indels). Even though 454 sequencers are able to produce longer reads, the method is frequently applied to small RNA (miRNA and siRNA) sequencing. Fast and accurate matching in particular of short reads with diverse errors is therefore a pressing practical problem. We introduce a matching model for short reads that can, besides mismatches, also cope with indels. It addresses different error models. For example, it can handle the problem of leading and trailing contaminations caused by primers and poly-A tails in transcriptomics or the length-dependent increase of error rates. In these contexts, it thus simplifies the tedious and error-prone trimming step. For efficient searches, our method utilizes index structures in the form of enhanced suffix arrays. In a comparison with current methods for short read mapping, the presented approach shows significantly increased performance not only for 454 reads, but also for Illumina reads. Our approach is implemented in the software segemehl available at http://www.bioinf.uni-leipzig.de/Software/segemehl/.},
Affiliation = {Bioinformatics Group, Department of Computer Science, University of Leipzig, Leipzig, Germany.},
Author = {Steve Hoffmann and Christian Otto and Stefan Kurtz and Cynthia M Sharma and Philipp Khaitovich and J{\"o}rg Vogel and Peter F Stadler and J{\"o}rg Hackerm{\"u}ller},
Date-Added = {2011-11-01 15:58:13 -0400},
Date-Modified = {2012-07-18 13:45:36 -0400},
Doi = {10.1371/journal.pcbi.1000502},
Journal = {PLoS Comput Biol},
Keywords = {Computational Biology, Mutation, DNA Mutational Analysis, Sequence Alignment, Base Sequence, Algorithms},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Hoffmann/PLoS%20Comput%20Biol%202009%20Hoffmann.pdf},
Month = {Sep},
Number = {9},
Pages = {e1000502},
Pmid = {19750212},
Rating = {0},
Title = {Fast mapping of short sequences with mismatches, insertions and deletions using index structures},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1027},
Volume = {5},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAgIAAAAAAgIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkBx9QTG9TIENvbXB1dCBCaW9sIDIwMDkjQjdBRUEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3rqzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAISG9mZm1hbm4AEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtkBwALY+wAC2LBAAXBKAAFwScAAg35AAIAXU1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA5OgBIb2ZmbWFubjoAUExvUyBDb21wdXQgQmlvbCAyMDA5I0I3QUVBLnBkZgAADgBGACIAUABMAG8AUwAgAEMAbwBtAHAAdQB0ACAAQgBpAG8AbAAgADIAMAAwADkAIABIAG8AZgBmAG0AYQBuAG4ALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAE1Vc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMDkvSG9mZm1hbm4vUExvUyBDb21wdXQgQmlvbCAyMDA5IEhvZmZtYW5uLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Ib2ZmbWFubi9QTG9TIENvbXB1dCBCaW9sIDIwMDkgSG9mZm1hbm4ucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAqYCqAKtArYCwQLFAtMC2gLjAy0DMgM1A0IDRwAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANZ},
Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pcbi.1000502}}
@article{Mavromatis:2006p894,
Author = {K Mavromatis and N Ivanova and K Barry and H Shapiro and E Goltsman and A.C McHardy and I Rigoutsos and A Salamov and F Korzeniewski and M Land},
Date-Added = {2011-10-04 09:44:07 -0400},
Date-Modified = {2011-11-01 15:31:47 -0400},
Journal = {Nature Methods},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2006/Mavromatis/Nature%20Methods%202006%20Mavromatis.pdf},
Number = {6},
Pages = {495--500},
Pmid = {13897449040596065161related:iVvSREWo3cAJ},
Rating = {0},
Read = {Yes},
Title = {Use of simulated data sets to evaluate the fidelity of metagenomic processing methods},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p894},
Volume = {4},
Year = {2006},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAggAAAAAAggAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjeR9OYXR1cmUgTWV0aG9kcyAyMDA2IE0jQjdBQTQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3qkzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAKTWF2cm9tYXRpcwAQAAgAAMwxEvIAAAARAAgAAMybdY8AAAABABgAC2N5AAtjbAALYsEABcEoAAXBJwACDfkAAgBfTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMDY6AE1hdnJvbWF0aXM6AE5hdHVyZSBNZXRob2RzIDIwMDYgTSNCN0FBNC5wZGYAAA4ARgAiAE4AYQB0AHUAcgBlACAATQBlAHQAaABvAGQAcwAgADIAMAAwADYAIABNAGEAdgByAG8AbQBhAHQAaQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBPVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA2L01hdnJvbWF0aXMvTmF0dXJlIE1ldGhvZHMgMjAwNiBNYXZyb21hdGlzLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QSS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwNi9NYXZyb21hdGlzL05hdHVyZSBNZXRob2RzIDIwMDYgTWF2cm9tYXRpcy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACrAKuArMCvALHAssC2QLgAukDNQM6Az0DSgNPAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA2E=}}
@article{Metzker:2009p245,
Author = {ML Metzker},
Date-Added = {2010-09-17 11:01:57 -0400},
Date-Modified = {2010-09-17 11:02:12 -0400},
Journal = {Nature Reviews Genetics},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Metzker/Nature%20Reviews%20Genetics%202009%20Metzker.pdf},
Number = {1},
Pages = {31--46},
Pmid = {9754351518737463225related:uXejiAZrXocJ},
Rating = {0},
Read = {Yes},
Title = {Sequencing technologies---the next generation},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p245},
Volume = {11},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAhAAAAAAAhAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkJx9OYXR1cmUgUmV2aWV3cyBHZW5ldGkjQjc5MDgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3kIzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHTWV0emtlcgAAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtkJwALY+wAC2LBAAXBKAAFwScAAg35AAIAXE1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA5OgBNZXR6a2VyOgBOYXR1cmUgUmV2aWV3cyBHZW5ldGkjQjc5MDgucGRmAA4AUgAoAE4AYQB0AHUAcgBlACAAUgBlAHYAaQBlAHcAcwAgAEcAZQBuAGUAdABpAGMAcwAgADIAMAAwADkAIABNAGUAdAB6AGsAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBSVXNlcnMvYWRpbmEvRG9jdW1lbnRzL1BhcGVycy8yMDA5L01ldHprZXIvTmF0dXJlIFJldmlld3MgR2VuZXRpY3MgMjAwOSBNZXR6a2VyLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBMLi4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA5L01ldHprZXIvTmF0dXJlIFJldmlld3MgR2VuZXRpY3MgMjAwOSBNZXR6a2VyLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAK0ArYCuwLEAs8C0wLhAugC8QNAA0UDSANVA1oAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADbA==}}
@article{Tyson:2004p92,
Abstract = {Microbial communities are vital in the functioning of all ecosystems; however, most microorganisms are uncultivated, and their roles in natural systems are unclear. Here, using random shotgun sequencing of DNA from a natural acidophilic biofilm, we report reconstruction of near-complete genomes of Leptospirillum group II and Ferroplasma type II, and partial recovery of three other genomes. This was possible because the biofilm was dominated by a small number of species populations and the frequency of genomic rearrangements and gene insertions or deletions was relatively low. Because each sequence read came from a different individual, we could determine that single-nucleotide polymorphisms are the predominant form of heterogeneity at the strain level. The Leptospirillum group II genome had remarkably few nucleotide polymorphisms, despite the existence of low-abundance variants. The Ferroplasma type II genome seems to be a composite from three ancestral strains that have undergone homologous recombination to form a large population of mosaic genomes. Analysis of the gene complement for each organism revealed the pathways for carbon and nitrogen fixation and energy generation, and provided insights into survival strategies in an extreme environment.},
Affiliation = {Department of Environmental Science, Policy and Management, University of California, Berkeley, California 94720, USA.},
Author = {Gene W Tyson and Jarrod Chapman and Philip Hugenholtz and Eric E Allen and Rachna J Ram and Paul M Richardson and Victor V Solovyev and Edward M Rubin and Daniel S Rokhsar and Jillian F Banfield},
Date-Added = {2009-12-04 15:14:08 -0500},
Date-Modified = {2009-12-04 15:14:08 -0500},
Doi = {10.1038/nature02340},
Journal = {Nature},
Keywords = {Genome: Archaeal, Open Reading Frames, Species Specificity, Polymorphism: Single Nucleotide, Base Composition, Recombination: Genetic, Environmental Microbiology, Genomics, Genetic Complementation Test, Genome: Bacterial, Carbon, Nitrogen Fixation, Archaea, Genes: Bacterial, RNA: Ribosomal: 16S, Sequence Analysis: DNA, Genes: Archaeal, Biofilms, Bacteria, Ecosystem, Molecular Sequence Data, Phylogeny, Base Sequence},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2004/Tyson/Nature%202004%20Tyson.pdf},
Month = {Mar},
Number = {6978},
Pages = {37--43},
Pii = {nature02340},
Pmid = {14961025},
Rating = {0},
Read = {Yes},
Title = {Community structure and metabolism through reconstruction of microbial genomes from the environment},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p92},
Url = {http://www.nature.com/nature/journal/v428/n6978/full/nature02340.html},
Volume = {428},
Year = {2004},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAcgAAAAAAcgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjPhVOYXR1cmUgMjAwNCBUeXNvbi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3dYzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAFVHlzb24AABAACAAAzDES8gAAABEACAAAzJt1jwAAAAEAGAALYz4AC2M1AAtiwQAFwSgABcEnAAIN+QACAFBNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAwNDoAVHlzb246AE5hdHVyZSAyMDA0IFR5c29uLnBkZgAOACwAFQBOAGEAdAB1AHIAZQAgADIAMAAwADQAIABUAHkAcwBvAG4ALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMDQvVHlzb24vTmF0dXJlIDIwMDQgVHlzb24ucGRmAAATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA3Li4vLi4vRG9jdW1lbnRzL1BhcGVycy8yMDA0L1R5c29uL05hdHVyZSAyMDA0IFR5c29uLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAJsAm4CcwJ8AocCiwKZAqACqQLjAugC6wL4Av0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADDw==},
Bdsk-Url-1 = {http://www.nature.com/nature/journal/v428/n6978/full/nature02340.html},
Bdsk-Url-2 = {http://dx.doi.org/10.1038/nature02340}}
@article{Schloss:2008p2,
Annote = {1/22/10
Tool: MG-DOTUR
Objective: Compare richness, membership, and structure of microbial communities using peptide fragment sequences extracted from metagenomic sequence data
Advancement: Accounts for present but unsampled peptide fragments, independent of subjective annotation process and includes peptide fragments with no known function
Define Operational Protein Families
Use statistical tools used to analyze collections of 16S rRNA gene sequences to the analysis of protein coding genes
With 16S, use DNA distance matrix obtained from an alignment of homologous genes.
For protein coding famiies, use BSR to develop a distance matrix that represents the similarity of ORFs across homologous groups
BSRs represent the fraction of identical amino acids between two peptide fragments (i.e. BSR of 0.30 means they are approximately 30% identical over their full length)
Distance matrix of 16S based on DNA similarity...distance of OPF based on peptide similarity
Working genome: assembled Bacillus genome (have individual sequence reads and assembled genome)
Step 1. Identify peptide fragrments from individual sequence reads (92,220 peptide fragemetns longer than 100 aa)
Step 2. Pairwise alignment and dsitance calculation of these ORFs...too much. Instead, used BLAST to identify those comparisons that had significant similarity and to calculate BSRs as a surrogate for distance values. Instead of a 92220 x 92220 matrix, matrix which contained the row, column, and BSR value
Step 3. Use DOTUR (which is used to assign 16S to OTUs to estimate richness and diversity) -- MG DOTUR to estimate OPFs
Question to test: The method may not be amenable to metagenomic sequencing because the shot sequence reads produce peptide fragements less than 100 aa long, which could make meaningful ORF identification an analysis of functional diversity difficult...
},
Author = {PD Schloss and J Handelsman},
Date-Added = {2009-12-02 20:17:12 -0500},
Date-Modified = {2010-01-23 15:41:47 -0500},
Journal = {Bmc Bioinformatics},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2008/Schloss/Bmc%20Bioinformatics%202008%20Schloss.pdf},
Number = {1},
Pages = {34},
Pmid = {11101033541305251475related:kzYypTDLDpoJ},
Rating = {4},
Read = {Yes},
Title = {A statistical toolbox for metagenomics: assessing functional diversity in microbial communities},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p2},
Volume = {9},
Year = {2008},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAgIAAAAAAgIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtj0x9CbWMgQmlvaW5mb3JtYXRpY3MgMjAjQjc2QzIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3bCzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHU2NobG9zcwAAEAAIAADMMRLyAAAAEQAIAADMm3WPAAAAAQAYAAtj0wALY6UAC2LBAAXBKAAFwScAAg35AAIAXE1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA4OgBTY2hsb3NzOgBCbWMgQmlvaW5mb3JtYXRpY3MgMjAjQjc2QzIucGRmAA4ASAAjAEIAbQBjACAAQgBpAG8AaQBuAGYAbwByAG0AYQB0AGkAYwBzACAAMgAwADAAOAAgAFMAYwBoAGwAbwBzAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAE1Vc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMDgvU2NobG9zcy9CbWMgQmlvaW5mb3JtYXRpY3MgMjAwOCBTY2hsb3NzLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwOC9TY2hsb3NzL0JtYyBCaW9pbmZvcm1hdGljcyAyMDA4IFNjaGxvc3MucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBkAGwAbwBxAHMAdQB4AHoAfACGAJMAmACgAqYCqAKtArYCwQLFAtMC2gLjAy0DMgM1A0IDRwAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANZ}}
@article{GomezAlvarez:2009p1334,
Abstract = {Metagenomics is providing an unprecedented view of the taxonomic diversity, metabolic potential and ecological role of microbial communities in biomes as diverse as the mammalian gastrointestinal tract, the marine water column and soils. However, we have found a systematic error in metagenomes generated by 454-based pyrosequencing that leads to an overestimation of gene and taxon abundance; between 11% and 35% of sequences in a typical metagenome are artificial replicates. Here we document the error in several published and original datasets and offer a web-based solution (http://microbiomes.msu.edu/replicates) for identifying and removing these artifacts.},
Affiliation = {Department of Microbiology and Molecular Genetics, Michigan State University, East Lansing, MI, USA.},
Author = {Vicente Gomez-Alvarez and Tracy K Teal and Thomas M Schmidt},
Date-Added = {2012-07-03 13:23:49 -0400},
Date-Modified = {2012-07-03 13:23:55 -0400},
Doi = {10.1038/ismej.2009.72},
Journal = {The ISME Journal},
Keywords = {Soil Microbiology, Molecular Sequence Data, Sequence Analysis: DNA, Databases: Genetic, Metagenomics, Metagenome, Sequence Alignment, Base Sequence},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2009/Gomez-Alvarez/The%20ISME%20Journal%202009%20Gomez-Alvarez.pdf},
Month = {Nov},
Number = {11},
Pages = {1314--7},
Pii = {ismej200972},
Pmid = {19587772},
Rating = {0},
Title = {Systematic artifacts in metagenomes from complex microbial communities},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1334},
Volume = {3},
Year = {2009},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAiAAAAAAAiAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtj/x9UaGUgSVNNRSBKb3VybmFsIDIwMDkjQjdEMDAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC30AzJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAANR29tZXotQWx2YXJlegAAEAAIAADMMRLyAAAAEQAIAADMm3WQAAAAAQAYAAtj/wALY+wAC2LBAAXBKAAFwScAAg35AAIAYk1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA5OgBHb21lei1BbHZhcmV6OgBUaGUgSVNNRSBKb3VybmFsIDIwMDkjQjdEMDAucGRmAA4AUAAnAFQAaABlACAASQBTAE0ARQAgAEoAbwB1AHIAbgBhAGwAIAAyADAAMAA5ACAARwBvAG0AZQB6AC0AQQBsAHYAYQByAGUAegAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAV1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Hb21lei1BbHZhcmV6L1RoZSBJU01FIEpvdXJuYWwgMjAwOSBHb21lei1BbHZhcmV6LnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QUS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwOS9Hb21lei1BbHZhcmV6L1RoZSBJU01FIEpvdXJuYWwgMjAwOSBHb21lei1BbHZhcmV6LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoALEAsYCywLUAt8C4wLxAvgDAQNVA1oDXQNqA28AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADgQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1038/ismej.2009.72}}
@article{Sommer:2007p1253,
Abstract = {We find that for small genomes and other small assembly tasks, Minimus is faster and far more flexible than existing tools. Due to its small size and modular design Minimus is perfectly suited to be a component of complex assembly pipelines. Minimus is released as an open-source software project and the code is available as part of the AMOS project at Sourceforge.},
Affiliation = {Center for Bioinformatics and Computational Biology, University of Maryland, College Park, MD 20742, USA. dsommer@umiacs.umd.edu <dsommer@umiacs.umd.edu>},
Author = {Daniel D Sommer and Arthur L Delcher and Steven L Salzberg and Mihai Pop},
Date-Added = {2012-02-03 09:19:56 -0500},
Date-Modified = {2012-02-03 09:20:03 -0500},
Doi = {10.1186/1471-2105-8-64},
Journal = {Bmc Bioinformatics},
Keywords = {Chromosome Mapping, Base Sequence, DNA, Molecular Sequence Data, Sequence Alignment, Software Design, Software, Algorithms, Sequence Analysis: DNA, User-Computer Interface},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2007/Sommer/Bmc%20Bioinformatics%202007%20Sommer.pdf},
Month = {Jan},
Pages = {64},
Pii = {1471-2105-8-64},
Pmid = {17324286},
Rating = {0},
Title = {Minimus: a fast, lightweight genome assembler},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1253},
Url = {http://www.biomedcentral.com/1471-2105/8/64},
Volume = {8},
Year = {2007},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAfwAAAAAAfwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjnR9CbWMgQmlvaW5mb3JtYXRpY3MgMjAjQjdCNUQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3tdzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAGU29tbWVyABAACAAAzDES8gAAABEACAAAzJt1jwAAAAEAGAALY50AC2OMAAtiwQAFwSgABcEnAAIN+QACAFtNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAwNzoAU29tbWVyOgBCbWMgQmlvaW5mb3JtYXRpY3MgMjAjQjdCNUQucGRmAAAOAEYAIgBCAG0AYwAgAEIAaQBvAGkAbgBmAG8AcgBtAGEAdABpAGMAcwAgADIAMAAwADcAIABTAG8AbQBtAGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAS1VzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwNy9Tb21tZXIvQm1jIEJpb2luZm9ybWF0aWNzIDIwMDcgU29tbWVyLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QRS4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwNy9Tb21tZXIvQm1jIEJpb2luZm9ybWF0aWNzIDIwMDcgU29tbWVyLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKgAqICpwKwArsCvwLNAtQC3QMlAyoDLQM6Az8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADUQ==},
Bdsk-Url-1 = {http://www.biomedcentral.com/1471-2105/8/64},
Bdsk-Url-2 = {http://dx.doi.org/10.1186/1471-2105-8-64}}
@article{Noguchi:2006p968,
Abstract = {Exhaustive gene identification is a fundamental goal in all metagenomics projects. However, most metagenomic sequences are unassembled anonymous fragments, and conventional gene-finding methods cannot be applied. We have developed a prokaryotic gene-finding program, MetaGene, which utilizes di-codon frequencies estimated by the GC content of a given sequence with other various measures. MetaGene can predict a whole range of prokaryotic genes based on the anonymous genomic sequences of a few hundred bases, with a sensitivity of 95% and a specificity of 90% for artificial shotgun sequences (700 bp fragments from 12 species). MetaGene has two sets of codon frequency interpolations, one for bacteria and one for archaea, and automatically selects the proper set for a given sequence using the domain classification method we propose. The domain classification works properly, correctly assigning domain information to more than 90% of the artificial shotgun sequences. Applied to the Sargasso Sea dataset, MetaGene predicted almost all of the annotated genes and a notable number of novel genes. MetaGene can be applied to wide variety of metagenomic projects and expands the utility of metagenomics.},
Affiliation = {Department of Computational Biology, Graduate School of Frontier Sciences, University of Tokyo, Kashiwa, Chiba 277-8562, Japan. hide@cb.k.u-tokyo.ac.jp},
Author = {Hideki Noguchi and Jungho Park and Toshihisa Takagi},
Date-Added = {2011-11-01 15:34:26 -0400},
Date-Modified = {2012-07-18 13:45:53 -0400},
Doi = {10.1093/nar/gkl723},
Journal = {Nucleic Acids Research},
Keywords = {Environment, Genomics, GC Rich Sequence, Genes: Bacterial, Internet, Software, Open Reading Frames, Oceans and Seas, Genes: Archaeal, Genome: Archaeal, Genome: Bacterial, Computational Biology},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2006/Noguchi/Nucleic%20Acids%20Research%202006%20Noguchi.pdf},
Month = {Jan},
Number = {19},
Pages = {5623--30},
Pii = {gkl723},
Pmid = {17028096},
Rating = {0},
Title = {MetaGene: prokaryotic gene finding from environmental genome shotgun sequences},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p968},
Volume = {34},
Year = {2006},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAg4AAAAAAg4AAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtjfR9OdWNsZWljIEFjaWRzIFJlc2VhcmMjQjdBRTYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3rmzJsvPwAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAHTm9ndWNoaQAAEAAIAADMMRLyAAAAEQAIAADMm3WPAAAAAQAYAAtjfQALY2wAC2LBAAXBKAAFwScAAg35AAIAXE1hY2ludG9zaCBIRDpVc2VyczoAYWRpbmE6AERvY3VtZW50czoAUGFwZXJzOgAyMDA2OgBOb2d1Y2hpOgBOdWNsZWljIEFjaWRzIFJlc2VhcmMjQjdBRTYucGRmAA4AUAAnAE4AdQBjAGwAZQBpAGMAIABBAGMAaQBkAHMAIABSAGUAcwBlAGEAcgBjAGgAIAAyADAAMAA2ACAATgBvAGcAdQBjAGgAaQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAUVVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAwNi9Ob2d1Y2hpL051Y2xlaWMgQWNpZHMgUmVzZWFyY2ggMjAwNiBOb2d1Y2hpLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QSy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAwNi9Ob2d1Y2hpL051Y2xlaWMgQWNpZHMgUmVzZWFyY2ggMjAwNiBOb2d1Y2hpLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAKyArQCuQLCAs0C0QLfAuYC7wM9A0IDRQNSA1cAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADaQ==},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkl723}}
@article{Qin:2010p189,
Abstract = {To understand the impact of gut microbes on human health and well-being it is crucial to assess their genetic potential. Here we describe the Illumina-based metagenomic sequencing, assembly and characterization of 3.3 million non-redundant microbial genes, derived from 576.7 gigabases of sequence, from faecal samples of 124 European individuals. The gene set, approximately 150 times larger than the human gene complement, contains an overwhelming majority of the prevalent (more frequent) microbial genes of the cohort and probably includes a large proportion of the prevalent human intestinal microbial genes. The genes are largely shared among individuals of the cohort. Over 99% of the genes are bacterial, indicating that the entire cohort harbours between 1,000 and 1,150 prevalent bacterial species and each individual at least 160 such species, which are also largely shared. We define and describe the minimal gut metagenome and the minimal gut bacterial genome in terms of functions present in all individuals and most bacteria, respectively.},
Affiliation = {BGI-Shenzhen, Shenzhen 518083, China.},
Author = {Junjie Qin and Ruiqiang Li and Jeroen Raes and Manimozhiyan Arumugam and Kristoffer Solvsten Burgdorf and Chaysavanh Manichanh and Trine Nielsen and Nicolas Pons and Florence Levenez and Takuji Yamada and Daniel R Mende and Junhua Li and Junming Xu and Shaochuan Li and Dongfang Li and Jianjun Cao and Bo Wang and Huiqing Liang and Huisong Zheng and Yinlong Xie and Julien Tap and Patricia Lepage and Marcelo Bertalan and Jean-Michel Batto and Torben Hansen and Denis Le Paslier and Allan Linneberg and H Bj{\o}rn Nielsen and Eric Pelletier and Pierre Renault and Thomas Sicheritz-Ponten and Keith Turner and Hongmei Zhu and Chang Yu and Shengting Li and Min Jian and Yan Zhou and Yingrui Li and Xiuqing Zhang and Songgang Li and Nan Qin and Huanming Yang and Jian Wang and S{\o}ren Brunak and Joel Dor{\'e} and Francisco Guarner and Karsten Kristiansen and Oluf Pedersen and Julian Parkhill and Jean Weissenbach and MetaHIT Consortium and Peer Bork and S Dusko Ehrlich and Jun Wang},
Date-Added = {2010-03-29 15:40:26 -0400},
Date-Modified = {2012-03-05 23:35:03 -0500},
Doi = {10.1038/nature08821},
Journal = {Nature},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2010/Qin/Nature%202010%20Qin.pdf},
Month = {Mar},
Number = {7285},
Pages = {59--65},
Pii = {nature08821},
Pmid = {20203603},
Rating = {0},
Read = {Yes},
Title = {A human gut microbial gene catalogue established by metagenomic sequencing},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p189},
Volume = {464},
Year = {2010},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAboAAAAAAboAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtkvRNOYXR1cmUgMjAxMCBRaW4ucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3g4zJsvQAAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAADUWluAAAQAAgAAMwxEvIAAAARAAgAAMybdZAAAAABABgAC2S9AAtkbAALYsEABcEoAAXBJwACDfkAAgBMTWFjaW50b3NoIEhEOlVzZXJzOgBhZGluYToARG9jdW1lbnRzOgBQYXBlcnM6ADIwMTA6AFFpbjoATmF0dXJlIDIwMTAgUWluLnBkZgAOACgAEwBOAGEAdAB1AHIAZQAgADIAMAAxADAAIABRAGkAbgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2FkaW5hL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9RaW4vTmF0dXJlIDIwMTAgUWluLnBkZgAAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QMy4uLy4uL0RvY3VtZW50cy9QYXBlcnMvMjAxMC9RaW4vTmF0dXJlIDIwMTAgUWluLnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZABsAG8AcQBzAHUAeAB6AHwAhgCTAJgAoAJeAmACZQJuAnkCfQKLApICmwLRAtYC2QLmAusAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAAC/Q==},
Bdsk-Url-1 = {http://dx.doi.org/10.1038/nature08821}}
@article{Mende:2012p1262,
Abstract = {Due to the complexity of the protocols and a limited knowledge of the nature of microbial communities, simulating metagenomic sequences plays an important role in testing the performance of existing tools and data analysis methods with metagenomic data. We developed metagenomic read simulators with platform-specific (Sanger, pyrosequencing, Illumina) base-error models, and simulated metagenomes of differing community complexities. We first evaluated the effect of rigorous quality control on Illumina data. Although quality filtering removed a large proportion of the data, it greatly improved the accuracy and contig lengths of resulting assemblies. We then compared the quality-trimmed Illumina assemblies to those from Sanger and pyrosequencing. For the simple community (10 genomes) all sequencing technologies assembled a similar amount and accurately represented the expected functional composition. For the more complex community (100 genomes) Illumina produced the best assemblies and more correctly resembled the expected functional composition. For the most complex community (400 genomes) there was very little assembly of reads from any sequencing technology. However, due to the longer read length the Sanger reads still represented the overall functional composition reasonably well. We further examined the effect of scaffolding of contigs using paired-end Illumina reads. It dramatically increased contig lengths of the simple community and yielded minor improvements to the more complex communities. Although the increase in contig length was accompanied by increased chimericity, it resulted in more complete genes and a better characterization of the functional repertoire. The metagenomic simulators developed for this research are freely available.},
Affiliation = {European Molecular Biology Laboratory, Heidelberg, Germany.},
Author = {Daniel R Mende and Alison S Waller and Shinichi Sunagawa and Aino I J{\"a}rvelin and Michelle M Chan and Manimozhiyan Arumugam and Jeroen Raes and Peer Bork},
Date-Added = {2012-03-20 11:19:51 -0400},
Date-Modified = {2012-03-20 11:20:01 -0400},
Doi = {10.1371/journal.pone.0031386},
Journal = {PLoS ONE},
Language = {eng},
Local-Url = {file://localhost/Users/Adina/Documents/Papers/2012/Mende/PLoS%20ONE%202012%20Mende.pdf},
Month = {Jan},
Number = {2},
Pages = {e31386},
Pii = {PONE-D-11-20074},
Pmid = {22384016},
Rating = {0},
Read = {Yes},
Title = {Assessment of metagenomic assembly using simulated next generation sequencing data},
Uri = {papers://0BEFB9CC-B23E-4FA5-9CA8-3692A18C8C99/Paper/p1262},
Volume = {7},
Year = {2012},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAdAAAAAAAdAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMwwzKJIKwAAAAtlXhdQTG9TIE9ORSAyMDEyIE1lbmRlLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC3uGzJsvQQAAAAAAAAAAAAIABQAACSAAAAAAAAAAAAAAAAAAAAAFTWVuZGUAABAACAAAzDES8gAAABEACAAAzJt1kQAAAAEAGAALZV4AC2VSAAtiwQAFwSgABcEnAAIN+QACAFJNYWNpbnRvc2ggSEQ6VXNlcnM6AGFkaW5hOgBEb2N1bWVudHM6AFBhcGVyczoAMjAxMjoATWVuZGU6AFBMb1MgT05FIDIwMTIgTWVuZGUucGRmAA4AMAAXAFAATABvAFMAIABPAE4ARQAgADIAMAAxADIAIABNAGUAbgBkAGUALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD9Vc2Vycy9hZGluYS9Eb2N1bWVudHMvUGFwZXJzLzIwMTIvTWVuZGUvUExvUyBPTkUgMjAxMiBNZW5kZS5wZGYAABMAAS8AABUAAgAM//8AAIAF0hwdHh9YJGNsYXNzZXNaJGNsYXNzbmFtZaMfICFdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDkuLi8uLi9Eb2N1bWVudHMvUGFwZXJzLzIwMTIvTWVuZGUvUExvUyBPTkUgMjAxMiBNZW5kZS5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACdAJ2AnsChAKPApMCoQKoArEC7QLyAvUDAgMHAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAxk=},
Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pone.0031386}}
@article{Meacham:2011,
Author = {Frazer Meacham and Dario Boffelli and Joseph Dhahbi and David IK Martin and Meromit Singer and Lior Pachter},
Doi = {10.1186/1471-2105-12-451},
Journal = {BMC Bioinformatics},
Language = {eng},
Month = {Nov},
Number = {451},
Title = {Assessment of metagenomic assembly using simulated next generation sequencing data},
Volume = {12},
Year = {2011}}