Skip to content

Commit

Permalink
TIKA-3962 - set rfc822 parser to no recurse
Browse files Browse the repository at this point in the history
  • Loading branch information
tballison committed Jan 30, 2023
1 parent ac3adfc commit bff14f3
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
config.isStrictParsing(), extractAllAlternatives);
parser.setContentHandler(mch);
parser.setContentDecoding(true);
parser.setNoRecurse();
xhtml.startDocument();
TikaInputStream tstream = TikaInputStream.get(stream);
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,10 +558,20 @@ public void testSimpleBodyInlined() throws Exception {
@Test
public void testGroupwise() throws Exception {
List<Metadata> metadataList = getRecursiveMetadata("testGroupWiseEml.eml");
assertEquals(2, metadataList.size());
assertContains("ssssss", metadataList.get(1).get(TikaCoreProperties.TIKA_CONTENT));
assertEquals(3, metadataList.size());
assertContains("test<", metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
assertContains("test2", metadataList.get(1).get(TikaCoreProperties.TIKA_CONTENT));
assertEquals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString(),
metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
assertEquals("/test.eml",
metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));

assertContains("ssssss", metadataList.get(2).get(TikaCoreProperties.TIKA_CONTENT));
assertEquals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString(),
metadataList.get(2).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
assertEquals("/Neues Textdokument.txt",
metadataList.get(2).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));

}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ Content-Type: message/rfc822
Content-Transfer-Encoding: base64
Content-Disposition: attachment; filename="test.eml"
TWltZS1WZXJzaW9uOiAxLjANClgtTWFpbGVyOiBHcm91cFdpc2UgMjAxMg0KU3ViamVjdDogdGVz
dA0KRGF0ZTogVGh1LCAyNyBKdW4gMjAxMyAxMzoyNzoxMiArMDIwMA0KTWVzc2FnZS1JRDogPDUx
Q0MzREIwMDIwMDAwMDAwMDAwMDAwM0AkJCQ+DQpGcm9tOiAiTm92ZWxsIEdyb3VwV2lzZSIgPCQk
JC4kJCQuJCQkPg0KQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5
PSJfX19fTFBITVhMWk1YT01STEZLU0VKQ1dfX19fIg0KDQoNCi0tX19fX0xQSE1YTFpNWE9NUkxG
S1NFSkNXX19fXw0KQ29udGVudC1UeXBlOiB0ZXh0L3BsYWluOyBjaGFyc2V0PXV0Zi04DQpDb250
ZW50LVRyYW5zZmVyLUVuY29kaW5nOiBiYXNlNjQNCkNvbnRlbnQtRGlzcG9zaXRpb246IGlubGlu
ZQ0KDQpkR1Z6ZEE9PQ0KLS1fX19fTFBITVhMWk1YT01STEZLU0VKQ1dfX19fDQpDb250ZW50LVR5
cGU6IHRleHQvaHRtbDsgY2hhcnNldD11dGYtOA0KQ29udGVudC1UcmFuc2Zlci1FbmNvZGluZzog
cXVvdGVkLXByaW50YWJsZQ0KDQo8SFRNTD48SEVBRD4NCjxNRVRBIGNvbnRlbnQ9M0QidGV4dC9o
dG1sOyBjaGFyc2V0PTNEdXRmLTgiIGh0dHAtZXF1aXY9M0RDb250ZW50LVR5cGU+DQo8TUVUQSBu
YW1lPTNER0VORVJBVE9SIGNvbnRlbnQ9M0QiTVNIVE1MIDguMDAuNzYwMS4xNzY5OSI+PC9IRUFE
Pg0KPEJPRFkgc3R5bGU9M0QiTUFSR0lOOiA0cHggNHB4IDFweDsgRk9OVDogMTBwdCBTZWdvZSBV
SSI+dGVzdDwvQk9EWT48L0hUTUw+DQotLV9fX19MUEhNWExaTVhPTVJMRktTRUpDV19fX18tLQ0K
TWltZS1WZXJzaW9uOiAxLjAKWC1NYWlsZXI6IEdyb3VwV2lzZSAyMDEyClN1YmplY3Q6IHRlc3Qy
CkRhdGU6IFRodSwgMjcgSnVuIDIwMTMgMTM6Mjc6MTIgKzAyMDAKTWVzc2FnZS1JRDogPDUxQ0Mz
REIwMDIwMDAwMDAwMDAwMDAwM0AkJCQ+CkZyb206ICJOb3ZlbGwgR3JvdXBXaXNlIiA8JCQkLiQk
JC4kJCQ+CkNvbnRlbnQtVHlwZTogbXVsdGlwYXJ0L2FsdGVybmF0aXZlOyBib3VuZGFyeT0iX19f
X0xQSE1YTFpNWE9NUkxGS1NFSkNXX19fXyIKCgotLV9fX19MUEhNWExaTVhPTVJMRktTRUpDV19f
X18KQ29udGVudC1UeXBlOiB0ZXh0L3BsYWluOyBjaGFyc2V0PXV0Zi04CkNvbnRlbnQtVHJhbnNm
ZXItRW5jb2Rpbmc6IGJhc2U2NApDb250ZW50LURpc3Bvc2l0aW9uOiBpbmxpbmUKCmRHVnpkREk9
Ci0tX19fX0xQSE1YTFpNWE9NUkxGS1NFSkNXX19fXwpDb250ZW50LVR5cGU6IHRleHQvaHRtbDsg
Y2hhcnNldD11dGYtOApDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nOiBxdW90ZWQtcHJpbnRhYmxl
Cgo8SFRNTD48SEVBRD4KPE1FVEEgY29udGVudD0zRCJ0ZXh0L2h0bWw7IGNoYXJzZXQ9M0R1dGYt
OCIgaHR0cC1lcXVpdj0zRENvbnRlbnQtVHlwZT4KPE1FVEEgbmFtZT0zREdFTkVSQVRPUiBjb250
ZW50PTNEIk1TSFRNTCA4LjAwLjc2MDEuMTc2OTkiPjwvSEVBRD4KPEJPRFkgc3R5bGU9M0QiTUFS
R0lOOiA0cHggNHB4IDFweDsgRk9OVDogMTBwdCBTZWdvZSBVSSI+dGVzdDI8L0JPRFk+PC9IVE1M
PgotLV9fX19MUEhNWExaTVhPTVJMRktTRUpDV19fX18tLQo=
--____LPHMXLZMXOMRLFKSEJCW____
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: quoted-printable
Expand Down

0 comments on commit bff14f3

Please sign in to comment.