Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from pbelmann/feature/verify-taxonomic-binning
feature verify taxonomic binning benchmakr
- Loading branch information
Showing
7 changed files
with
2,869 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2,297 changes: 2,297 additions & 0 deletions
2,297
biobox_cli/verification/data/taxonomic_binning/contigs.fna
Large diffs are not rendered by default.
Oops, something went wrong.
97 changes: 97 additions & 0 deletions
97
biobox_cli/verification/data/taxonomic_binning/labels.binning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#Bioboxes binning format at http://bioboxes.org/ | ||
@Version:0.9.1 | ||
@SampleID:bioboxes-validation-dataset | ||
@TaxonomyID:13e21ba783e8aabf730b783276858958 | ||
|
||
@@SEQUENCEID TAXID | ||
S|S1|C10029 370895 | ||
S|S1|C10001 370895 | ||
S|S1|C10020 370895 | ||
S|S1|C10026 370895 | ||
S|S1|C10030 370895 | ||
S|S1|C10077 1045854 | ||
S|S1|C1001 1009708 | ||
S|S1|C10035 1009708 | ||
S|S1|C10000 1009708 | ||
S|S1|C10 1009708 | ||
S|S1|C1005 1009708 | ||
S|S1|C0 1394711 | ||
S|S1|C10047 1394711 | ||
S|S1|C10010 1394711 | ||
S|S1|C10078 1263006 | ||
S|S1|C10056 1263006 | ||
S|S1|C10066 1263006 | ||
S|S1|C10040 1263006 | ||
S|S1|C10057 1263006 | ||
S|S1|C10041 1263006 | ||
S|S1|C10009 1263006 | ||
S|S1|C10061 1263006 | ||
S|S1|C10024 1263006 | ||
S|S1|C10028 1263006 | ||
S|S1|C10071 1263006 | ||
S|S1|C10012 1229484 | ||
S|S1|C10044 742723 | ||
S|S1|C10036 349101 | ||
S|S1|C10076 349101 | ||
S|S1|C10032 349101 | ||
S|S1|C10031 349101 | ||
S|S1|C10021 349101 | ||
S|S1|C10043 349101 | ||
S|S1|C10013 349101 | ||
S|S1|C10037 349101 | ||
S|S1|C10014 349101 | ||
S|S1|C10050 349101 | ||
S|S1|C10002 349101 | ||
S|S1|C1008 349101 | ||
S|S1|C1004 1230476 | ||
S|S1|C10019 1230476 | ||
S|S1|C1000 1230476 | ||
S|S1|C10064 1230476 | ||
S|S1|C10065 1230476 | ||
S|S1|C10018 1230476 | ||
S|S1|C1002 1230476 | ||
S|S1|C10005 1230476 | ||
S|S1|C10053 1230476 | ||
S|S1|C10004 1230476 | ||
S|S1|C10003 1230476 | ||
S|S1|C10048 1230476 | ||
S|S1|C10058 1230476 | ||
S|S1|C10017 1230476 | ||
S|S1|C10039 1230476 | ||
S|S1|C1 1230476 | ||
S|S1|C1006 1230476 | ||
S|S1|C10045 1230476 | ||
S|S1|C10079 1230476 | ||
S|S1|C10027 1230476 | ||
S|S1|C10074 1230476 | ||
S|S1|C10063 1230476 | ||
S|S1|C10051 1230476 | ||
S|S1|C10033 1230476 | ||
S|S1|C10052 434085 | ||
S|S1|C1007 434085 | ||
S|S1|C10072 434085 | ||
S|S1|C10073 1174684 | ||
S|S1|C10069 1174684 | ||
S|S1|C10008 1174684 | ||
S|S1|C10055 1174684 | ||
S|S1|C10067 1174684 | ||
S|S1|C10075 1174684 | ||
S|S1|C10070 1174684 | ||
S|S1|C10059 1174684 | ||
S|S1|C10006 1174684 | ||
S|S1|C10023 1412874 | ||
S|S1|C10062 939301 | ||
S|S1|C10016 939301 | ||
S|S1|C10046 939301 | ||
S|S1|C10060 939301 | ||
S|S1|C100 939301 | ||
S|S1|C10025 939301 | ||
S|S1|C10015 939301 | ||
S|S1|C10038 939301 | ||
S|S1|C10011 939301 | ||
S|S1|C10022 1235799 | ||
S|S1|C10007 245012 | ||
S|S1|C1003 245012 | ||
S|S1|C10080 245012 | ||
S|S1|C10068 245012 | ||
S|S1|C10034 245012 |
100 changes: 100 additions & 0 deletions
100
biobox_cli/verification/data/taxonomic_binning/prediction.binning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# This is the bioboxes.org binning output format at | ||
# https://github.com/bioboxes/rfc/tree/master/data-format | ||
|
||
@Version:0.9.1 | ||
@SampleID:bioboxes-validation-dataset | ||
@TaxonomyID:13e21ba783e8aabf730b783276858958 | ||
@_TaxatorTK_Version:1.3 | ||
|
||
@@SequenceID TaxID _TaxatorTK_Support _TaxatorTK_Length | ||
S|S1|C0 1 186 186 | ||
S|S1|C1 374 511 617 | ||
S|S1|C10 1 100 100 | ||
S|S1|C100 1 621 621 | ||
S|S1|C1000 1 174 180 | ||
S|S1|C10000 28211 949 949 | ||
S|S1|C10001 32008 3051 3051 | ||
S|S1|C10002 1060 229 229 | ||
S|S1|C10003 374 367 391 | ||
S|S1|C10004 374 269 302 | ||
S|S1|C10005 1 100 100 | ||
S|S1|C10006 165697 156 156 | ||
S|S1|C10007 186802 500 500 | ||
S|S1|C10008 1 239 239 | ||
S|S1|C10009 1 199 199 | ||
S|S1|C1001 28211 595 595 | ||
S|S1|C10010 1 334 334 | ||
S|S1|C10011 1 236 236 | ||
S|S1|C10012 28211 38741 38741 | ||
S|S1|C10013 1 605 605 | ||
S|S1|C10014 1060 100 100 | ||
S|S1|C10015 1 183 183 | ||
S|S1|C10016 1 456 456 | ||
S|S1|C10017 41294 328 369 | ||
S|S1|C10018 374 226 238 | ||
S|S1|C10019 374 651 705 | ||
S|S1|C1002 374 194 207 | ||
S|S1|C10020 13373 7443 7443 | ||
S|S1|C10021 1063 455 455 | ||
S|S1|C10022 186803 1380 1380 | ||
S|S1|C10023 2157 22875 22875 | ||
S|S1|C10024 1239 751 751 | ||
S|S1|C10025 1 201 201 | ||
S|S1|C10026 13373 9566 9566 | ||
S|S1|C10027 1 288 288 | ||
S|S1|C10028 1 168 168 | ||
S|S1|C10029 32008 1154 1154 | ||
S|S1|C1003 186802 707 707 | ||
S|S1|C10030 13373 5574 5574 | ||
S|S1|C10031 1063 625 625 | ||
S|S1|C10032 1063 280 280 | ||
S|S1|C10033 1 462 494 | ||
S|S1|C10034 186802 405 405 | ||
S|S1|C10035 1 100 100 | ||
S|S1|C10036 1063 184 184 | ||
S|S1|C10037 1063 271 271 | ||
S|S1|C10038 1 189 189 | ||
S|S1|C10039 374 174 182 | ||
S|S1|C1004 374 155 160 | ||
S|S1|C10040 1 259 259 | ||
S|S1|C10041 1 311 311 | ||
S|S1|C10043 1063 283 283 | ||
S|S1|C10044 1 238 238 | ||
S|S1|C10045 374 280 332 | ||
S|S1|C10046 1 163 163 | ||
S|S1|C10047 1 141 141 | ||
S|S1|C10048 374 444 508 | ||
S|S1|C1005 1 1217 1217 | ||
S|S1|C10050 1063 499 499 | ||
S|S1|C10051 374 741 822 | ||
S|S1|C10052 1236 4845 4845 | ||
S|S1|C10053 374 165 172 | ||
S|S1|C10055 165697 1542 1542 | ||
S|S1|C10056 1 150 150 | ||
S|S1|C10057 1 100 100 | ||
S|S1|C10058 1 440 541 | ||
S|S1|C10059 165697 931 931 | ||
S|S1|C1006 374 436 461 | ||
S|S1|C10060 1 171 171 | ||
S|S1|C10061 1 382 382 | ||
S|S1|C10062 1 309 309 | ||
S|S1|C10063 374 431 483 | ||
S|S1|C10064 374 522 572 | ||
S|S1|C10065 1 186 186 | ||
S|S1|C10066 1239 193 193 | ||
S|S1|C10067 165697 596 596 | ||
S|S1|C10068 186802 433 433 | ||
S|S1|C10069 165697 270 270 | ||
S|S1|C1007 1236 2621 2621 | ||
S|S1|C10070 165697 336 336 | ||
S|S1|C10071 1 328 328 | ||
S|S1|C10072 1236 1780 1780 | ||
S|S1|C10073 1 700 700 | ||
S|S1|C10074 1 99 161 | ||
S|S1|C10075 41297 962 962 | ||
S|S1|C10076 1060 196 196 | ||
S|S1|C10077 1 372 372 | ||
S|S1|C10078 1239 410 410 | ||
S|S1|C10079 1 248 248 | ||
S|S1|C1008 1063 233 233 | ||
S|S1|C10080 186802 188 188 |
58 changes: 58 additions & 0 deletions
58
biobox_cli/verification/data/taxonomic_binning/taxonomy/names.dmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
1 | root | | scientific name | | ||
2 | Bacteria | Bacteria <prokaryote> | scientific name | | ||
356 | Rhizobiales | | scientific name | | ||
374 | Bradyrhizobium | | scientific name | | ||
1060 | Rhodobacter | | scientific name | | ||
1063 | Rhodobacter sphaeroides | | scientific name | | ||
1224 | Proteobacteria | | scientific name | | ||
1236 | Gammaproteobacteria | | scientific name | | ||
1239 | Firmicutes | | scientific name | | ||
2157 | Archaea | | scientific name | | ||
2323 | unclassified Bacteria | | scientific name | | ||
13373 | Burkholderia mallei | | scientific name | | ||
28211 | Alphaproteobacteria | | scientific name | | ||
28216 | Betaproteobacteria | | scientific name | | ||
31989 | Rhodobacteraceae | | scientific name | | ||
32008 | Burkholderia | | scientific name | | ||
33807 | unclassified Alphaproteobacteria (miscellaneous) | | scientific name | | ||
33811 | unclassified Gammaproteobacteria (miscellaneous) | | scientific name | | ||
39779 | unclassified Clostridiales (miscellaneous) | | scientific name | | ||
41294 | Bradyrhizobiaceae | | scientific name | | ||
41297 | Sphingomonadaceae | | scientific name | | ||
46255 | Weissella | | scientific name | | ||
47928 | environmental samples | environmental samples <clostridial firmicutes> | scientific name | | ||
48510 | environmental samples | environmental samples <Archaea> | scientific name | | ||
80840 | Burkholderiales | | scientific name | | ||
81850 | Leuconostocaceae | | scientific name | | ||
82117 | unclassified Alphaproteobacteria | | scientific name | | ||
91061 | Bacilli | | scientific name | | ||
95818 | Candidatus Saccharibacteria | | scientific name | | ||
111527 | pseudomallei group | | scientific name | | ||
118884 | unclassified Gammaproteobacteria | | scientific name | | ||
119060 | Burkholderiaceae | | scientific name | | ||
131567 | cellular organisms | | scientific name | | ||
165096 | Weissella koreensis | | scientific name | | ||
165697 | Sphingopyxis | | scientific name | | ||
186801 | Clostridia | | scientific name | | ||
186802 | Clostridiales | | scientific name | | ||
186803 | Lachnospiraceae | | scientific name | | ||
186813 | unclassified Clostridiales | | scientific name | | ||
186826 | Lactobacillales | | scientific name | | ||
186928 | unclassified Lachnospiraceae | | scientific name | | ||
204455 | Rhodobacterales | | scientific name | | ||
204457 | Sphingomonadales | | scientific name | | ||
245012 | butyrate-producing bacterium SM4/1 | | scientific name | | ||
349101 | Rhodobacter sphaeroides ATCC 17029 | | scientific name | | ||
370895 | Burkholderia mallei 2002721280 | | scientific name | | ||
434085 | gamma proteobacterium IMCC2047 | | scientific name | | ||
742723 | Lachnospiraceae bacterium 2_1_46FAA | | scientific name | | ||
939301 | alpha proteobacterium SCGC AAA015-O19 | | scientific name | | ||
1009708 | alpha proteobacterium SCGC AAA536-G10 | | scientific name | | ||
1045854 | Weissella koreensis KACC 15510 | | scientific name | | ||
1174684 | Sphingopyxis sp. MC1 | | scientific name | | ||
1229484 | alpha proteobacterium LLX12A | | scientific name | | ||
1230476 | Bradyrhizobium sp. DFCI-1 | | scientific name | | ||
1235799 | Lachnospiraceae bacterium 3-2 | | scientific name | | ||
1263006 | Firmicutes bacterium CAG:170 | | scientific name | | ||
1394711 | Candidatus Saccharibacteria bacterium RAAC3_TM7_1 | | scientific name | | ||
1412874 | uncultured archaeon A07HR60 | | scientific name | |
58 changes: 58 additions & 0 deletions
58
biobox_cli/verification/data/taxonomic_binning/taxonomy/nodes.dmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | | ||
2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | | ||
356 | 28211 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
374 | 41294 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
1060 | 31989 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
1063 | 1060 | species | RS | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
1239 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
2157 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | | ||
2323 | 2 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
13373 | 111527 | species | BM | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
28211 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
28216 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
31989 | 204455 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
32008 | 119060 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
33807 | 82117 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
33811 | 118884 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
39779 | 186813 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
41294 | 356 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
41297 | 204457 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
46255 | 81850 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
47928 | 1239 | no rank | | 11 | 0 | 11 | 1 | 0 | 1 | 0 | 0 | uncultured | | ||
48510 | 2157 | no rank | | 11 | 0 | 11 | 1 | 0 | 1 | 0 | 0 | uncultured | | ||
80840 | 28216 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
81850 | 186826 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
82117 | 28211 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
91061 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
95818 | 2323 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
111527 | 32008 | species group | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
118884 | 1236 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
119060 | 80840 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | | ||
165096 | 46255 | species | WK | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
165697 | 41297 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
186801 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
186802 | 186801 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
186803 | 186802 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
186813 | 186802 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
186826 | 91061 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
186928 | 186803 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
204455 | 28211 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
204457 | 28211 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
245012 | 39779 | species | BB | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
349101 | 1063 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
370895 | 13373 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
434085 | 33811 | species | GP | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
742723 | 186928 | species | LB | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
939301 | 33807 | species | AP | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1009708 | 33807 | species | AP | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1045854 | 165096 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1174684 | 165697 | species | SS | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1229484 | 33807 | species | AP | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1230476 | 374 | species | BS | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1235799 | 186928 | species | LB | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1263006 | 47928 | species | FB | 11 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | uncultured | | ||
1394711 | 95818 | species | CS | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | | ||
1412874 | 48510 | species | UA | 11 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | uncultured | |
Oops, something went wrong.