Skip to content
Permalink
Browse files
ANY23-520 Augment any23 extractor CLI to print all mimetypes for a gi…
…ven extractor (#212)

* ANY23-520 Augment any23 extractor CLI to print all mimetypes for a given extractor
  • Loading branch information
lewismc committed Oct 19, 2021
1 parent 85376fe commit 15126ef5a535ef64781be4d2a9a7909df94b293e
Showing 2 changed files with 38 additions and 32 deletions.
@@ -40,7 +40,8 @@
"extractor" }, commandDescription = "Utility for obtaining documentation about metadata extractors.")
public class ExtractorDocumentation extends BaseTool {

@Parameter(names = { "-l", "--list" }, description = "shows the names of all available extractors")
@Parameter(names = { "-l",
"--list" }, description = "shows the names, labels and supported mimetypes of all available extractors")
private boolean showList;

@Parameter(names = { "-i", "--input" }, description = "shows example input for the given extractor")
@@ -88,15 +89,15 @@ public void printError(String msg) {
}

/**
* Prints the list of all the available extractors.
* Prints the list of all the available extractor names, labels and supported mimetypes.
*
* @param registry
* the {@link org.apache.any23.extractor.ExtractorRegistry} containing all extractors
*/
public void printExtractorList(ExtractorRegistry registry) {
for (ExtractorFactory factory : registry.getExtractorGroup()) {
out.println(
String.format(Locale.ROOT, "%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
out.println(String.format(Locale.ROOT, "%25s [%15s] %15s", factory.getExtractorName(),
factory.getExtractorLabel(), factory.getSupportedMIMETypes()));
}
}

@@ -209,34 +209,39 @@ core$ any23 rover -t -f quad myfoaf.rdf

+--------------------------------------
cli$ any23 extractor --list
csv [org.apache.any23.extractor.csv.CSVExtractorFactory]
html-embedded-jsonld [org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory]
html-head-icbm [org.apache.any23.extractor.html.ICBMExtractorFactory]
html-head-links [org.apache.any23.extractor.html.HeadLinkExtractorFactory]
html-head-meta [org.apache.any23.extractor.html.HTMLMetaExtractorFactory]
html-head-title [org.apache.any23.extractor.html.TitleExtractorFactory]
html-mf-adr [org.apache.any23.extractor.html.AdrExtractorFactory]
html-mf-geo [org.apache.any23.extractor.html.GeoExtractorFactory]
html-mf-hcalendar [org.apache.any23.extractor.html.HCalendarExtractorFactory]
html-mf-hcard [org.apache.any23.extractor.html.HCardExtractorFactory]
html-mf-hlisting [org.apache.any23.extractor.html.HListingExtractorFactory]
html-mf-hrecipe [org.apache.any23.extractor.html.HRecipeExtractorFactory]
html-mf-hresume [org.apache.any23.extractor.html.HResumeExtractorFactory]
html-mf-hreview [org.apache.any23.extractor.html.HReviewExtractorFactory]
html-mf-hreview-aggregate [org.apache.any23.extractor.html.HReviewAggregateExtractorFactory]
html-mf-license [org.apache.any23.extractor.html.LicenseExtractorFactory]
html-mf-species [org.apache.any23.extractor.html.SpeciesExtractorFactory]
html-mf-xfn [org.apache.any23.extractor.html.XFNExtractorFactory]
html-microdata [org.apache.any23.extractor.microdata.MicrodataExtractorFactory]
html-rdfa11 [org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory]
html-xpath [org.apache.any23.extractor.xpath.XPathExtractorFactory]
rdf-jsonld [org.apache.any23.extractor.rdf.JSONLDExtractorFactory]
rdf-nq [org.apache.any23.extractor.rdf.NQuadsExtractorFactory]
rdf-nt [org.apache.any23.extractor.rdf.NTriplesExtractorFactory]
rdf-trix [org.apache.any23.extractor.rdf.TriXExtractorFactory]
rdf-turtle [org.apache.any23.extractor.rdf.TurtleExtractorFactory]
rdf-xml [org.apache.any23.extractor.rdf.RDFXMLExtractorFactory]
yaml [org.apache.any23.extractor.yaml.YAMLExtractorFactory]
csv [org.apache.any23.extractor.csv.CSVExtractorFactory] [text/csv;q=0.1]
html-embedded-jsonld [org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory] [text/html;q=0.02, application/xhtml+xml;q=0.02]
html-head-icbm [org.apache.any23.extractor.html.ICBMExtractorFactory] [text/html;q=0.01, application/xhtml+xml;q=0.01]
html-head-links [org.apache.any23.extractor.html.HeadLinkExtractorFactory] [text/html;q=0.05, application/xhtml+xml;q=0.05]
html-head-meta [org.apache.any23.extractor.html.HTMLMetaExtractorFactory] [text/html;q=0.02, application/xhtml+xml;q=0.02]
html-head-title [org.apache.any23.extractor.html.TitleExtractorFactory] [text/html;q=0.02, application/xhtml+xml;q=0.02]
html-mf-adr [org.apache.any23.extractor.html.AdrExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-geo [org.apache.any23.extractor.html.GeoExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hcalendar [org.apache.any23.extractor.html.HCalendarExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hcard [org.apache.any23.extractor.html.HCardExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hlisting [org.apache.any23.extractor.html.HListingExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hrecipe [org.apache.any23.extractor.html.HRecipeExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hresume [org.apache.any23.extractor.html.HResumeExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hreview [org.apache.any23.extractor.html.HReviewExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-hreview-aggregate [org.apache.any23.extractor.html.HReviewAggregateExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-license [org.apache.any23.extractor.html.LicenseExtractorFactory] [text/html;q=0.01, application/xhtml+xml;q=0.01]
html-mf-species [org.apache.any23.extractor.html.SpeciesExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-mf-xfn [org.apache.any23.extractor.html.XFNExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-microdata [org.apache.any23.extractor.microdata.MicrodataExtractorFactory] [text/html;q=0.1, application/xhtml+xml;q=0.1]
html-rdfa11 [org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory] [application/xhtml+xml;q=0.3, application/html;q=0.3, text/html;q=0.3]
html-xpath [org.apache.any23.extractor.xpath.XPathExtractorFactory] [text/html;q=0.02, application/xhtml+xml;q=0.02]
ical [org.apache.any23.extractor.calendar.ICalExtractorFactory] [text/calendar]
jcal [org.apache.any23.extractor.calendar.JCalExtractorFactory] [application/calendar+json]
owl-functional [org.apache.any23.extractor.rdf.FunctionalSyntaxExtractorFactory] [text/owl-functional]
owl-manchester [org.apache.any23.extractor.rdf.ManchesterSyntaxExtractorFactory] [text/owl-manchester]
rdf-jsonld [org.apache.any23.extractor.rdf.JSONLDExtractorFactory] [application/ld+json;q=0.1]
rdf-nq [org.apache.any23.extractor.rdf.NQuadsExtractorFactory] [application/n-quads, text/x-nquads;q=0.1, text/rdf+nq;q=0.1, text/nq;q=0.1, text/nquads;q=0.1, text/n-quads;q=0.1]
rdf-nt [org.apache.any23.extractor.rdf.NTriplesExtractorFactory] [application/n-triples;q=0.1, text/nt;q=0.1, text/ntriples;q=0.1, text/plain;q=0.1]
rdf-trix [org.apache.any23.extractor.rdf.TriXExtractorFactory] [application/trix]
rdf-turtle [org.apache.any23.extractor.rdf.TurtleExtractorFactory] [text/turtle, text/rdf+n3, text/n3, application/n3, application/x-turtle, application/turtle]
rdf-xml [org.apache.any23.extractor.rdf.RDFXMLExtractorFactory] [application/rdf+xml, text/rdf, text/rdf+xml, application/rdf]
xcal [org.apache.any23.extractor.calendar.XCalExtractorFactory] [application/calendar+xml]
yaml [org.apache.any23.extractor.yaml.YAMLExtractorFactory] [text/x-yaml;q=0.5]
+--------------------------------------

** The MicrodataParser tool

0 comments on commit 15126ef

Please sign in to comment.