diff --git a/CHANGES.txt b/CHANGES.txt index 9953e46d31..f9ac540e69 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -16,6 +16,9 @@ Release 3.0.0-BETA - 12/01/2023 * Tika will look for "custom-mimetypes.xml" directly on the classpath, NOT under "/org/apache/tika/mime/". (TIKA-4147). + * Return media type "text/javascript" instead of "application/javascript" + to follow RFC-9239. (TIKA-4119). + Other Changes/Updates * Upgrade PDFBox to 3.0.1 (TIKA-3347) diff --git a/README.md b/README.md index 97d7096567..88fe59cb89 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Tika jars can be fetched from Maven Central or your favourite Maven mirror. **Tika 1.X reached End of Life (EOL) on September 30, 2022.** -Tika is based on **Java 8** and uses the [Maven 3](https://maven.apache.org) build system. +Tika is based on **Java 11** and uses the [Maven 3](https://maven.apache.org) build system. **N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. As of Tika 2.5.1, if Docker is not installed, those tests are skipped. Docker is required for a successful build on earlier 2.x versions. @@ -50,7 +50,7 @@ Maven Dependencies Apache Tika provides *Bill of Material* (BOM) artifact to align Tika module versions and simplify version management. To avoid convergence errors in your own project, import this -bom or Tika's parent pom.xml in your dependencey management section. +bom or Tika's parent pom.xml in your dependency management section. If you use Apache Maven: @@ -170,7 +170,7 @@ Notification on all code changes are sent to the following mailing list: The mailing lists are open to anyone and publicly archived. You can subscribe the mailing lists by sending a message to -[LIST]-subscribe@tika.apache.org (for example user-subscribe@...). +[LIST]-subscribe@tika.apache.org (for example, user-subscribe@...). To unsubscribe, send a message to [LIST]-unsubscribe@tika.apache.org. For more instructions, send a message to [LIST]-help@tika.apache.org. diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index b76adebd1f..54f7cc6f60 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -366,15 +366,18 @@ - + + - <_comment>JavaScript Source Code + + + - + diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java index 1cd0f40a27..2158658861 100644 --- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java +++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java @@ -56,7 +56,8 @@ public void testHttpServerFileExtensions() { assertEquals("application/java-archive", tika.detect("x.jar")); assertEquals("application/java-serialized-object", tika.detect("x.ser")); assertEquals("application/java-vm", tika.detect("x.class")); - assertEquals("application/javascript", tika.detect("x.js")); + assertEquals("text/javascript", tika.detect("x.js")); + assertEquals("text/javascript", tika.detect("x.mjs")); assertEquals("application/json", tika.detect("x.json")); assertEquals("application/lost+xml", tika.detect("x.lostxml")); assertEquals("application/mac-binhex40", tika.detect("x.hqx")); diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java index 7d576d73a3..0d904f6dfb 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Set; import java.util.concurrent.Executors; +import java.util.stream.Collectors; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -279,6 +280,24 @@ public void testGetExtensionForPowerPoint() throws Exception { assertEquals(".ppt", mt.getExtensions().get(0)); } + @Test + public void testGetExtensionForJavaScript() throws Exception { + MimeType mt = this.mimeTypes.forName("text/javascript"); + assertEquals(".js", mt.getExtension()); + assertEquals(List.of(".js", ".mjs"), mt.getExtensions()); + } + + @Test + public void testGetAliasForJavaScript() throws Exception { + MimeType mt = this.mimeTypes.forName("text/javascript"); + Set aliases = mimeTypes.getMediaTypeRegistry() + .getAliases(mt.getType()) + .stream() + .map(MediaType::toString) + .collect(Collectors.toSet()); + assertEquals(Set.of("application/javascript", "application/x-javascript"), aliases); + } + @Test public void testGetRegisteredMimesWithParameters() throws Exception { //TIKA-1692 @@ -351,40 +370,32 @@ public void testMinShouldMatch() throws Exception { } @Test - public void testBadMinShouldMatch1() throws Exception { + public void testBadMinShouldMatch1() { System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, "src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch1.xml"); - assertThrows(IllegalArgumentException.class, () -> { - MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader()); - }); + assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader())); } @Test - public void testBadMinShouldMatch2() throws Exception { + public void testBadMinShouldMatch2() { System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, "src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch2.xml"); - assertThrows(IllegalArgumentException.class, () -> { - MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader()); - }); + assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader())); } @Test - public void testBadMinShouldMatch3() throws Exception { + public void testBadMinShouldMatch3() { System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, "src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch3.xml"); - assertThrows(IllegalArgumentException.class, () -> { - MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader()); - }); + assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader())); } @Test - public void testBadMinShouldMatch4() throws Exception { + public void testBadMinShouldMatch4() { System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, "src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch4.xml"); - assertThrows(IllegalArgumentException.class, () -> { - MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader()); - }); + assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader())); } private static class CustomClassLoader extends ClassLoader { diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java index 437cefc6d2..d9a65bf1b0 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java @@ -246,6 +246,6 @@ public void testTIKA2237() throws IOException { "} catch (e) {\n" + " console.log(e);\n" + "}") .getBytes(StandardCharsets.UTF_8)); MediaType detect = new ProbabilisticMimeDetectionSelector().detect(input, metadata); - assertEquals(MediaType.application("javascript"), detect); + assertEquals(MediaType.text("javascript"), detect); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java index 3dad7d6aff..1b66a7efed 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java @@ -718,8 +718,8 @@ public void testTextBasedFormatsDetection() throws Exception { assertTypeByName("text/html", "testHTML.html"); assertType("text/html", "testHTML.html"); - assertTypeByName("application/javascript", "testJS.js"); - assertType("application/javascript", "testJS.js"); + assertTypeByName("text/javascript", "testJS.js"); + assertType("text/javascript", "testJS.js"); assertType("text/vnd.graphviz", "testGRAPHVIZd.dot"); assertType("text/vnd.graphviz", "testGRAPHVIZg.dot"); @@ -1148,10 +1148,10 @@ public void testCodeFormats() throws Exception { assertTypeByData("text/x-matlab", "testMATLAB_barcast.m"); // By name, or by name+data, gets it as JS - assertTypeByName("application/javascript", "testJS.js"); - assertTypeByName("application/javascript", "testJS_HTML.js"); - assertType("application/javascript", "testJS.js"); - assertType("application/javascript", "testJS_HTML.js"); + assertTypeByName("text/javascript", "testJS.js"); + assertTypeByName("text/javascript", "testJS_HTML.js"); + assertType("text/javascript", "testJS.js"); + assertType("text/javascript", "testJS_HTML.js"); // With data only, because we have no JS file magic, can't be // detected. One will come through as plain text, the other