@@ -255,20 +255,6 @@ private void checkLinks(Resource resource, String[][] expectedLinks) {
255255 }
256256 }
257257
258- private void checkExtractHtmlLangAttribute (Resource resource , String ... langAttributes )
259- throws JSONException {
260- assertNotNull (resource );
261- assertTrue ("Wrong instance type of Resource: " + resource .getClass (), resource instanceof HTMLResource );
262- JSONArray metas = resource .getMetaData ().getJSONObject ("Head" ).getJSONArray ("Metas" );
263- assertNotNull (metas );
264- JSONObject meta = metas .getJSONObject (0 );
265- for (int i = 0 ; i < langAttributes .length ; i += 2 ) {
266- String key = langAttributes [i ];
267- assertNotNull (meta .get (key ));
268- assertEquals (meta .get (key ), langAttributes [i +1 ]);
269- }
270- }
271-
272258 public void testLinkExtraction () throws ResourceParseException , IOException {
273259 String testFileName = "link-extraction-test.warc" ;
274260 ResourceProducer producer = ProducerUtils .getProducer (getClass ().getResource (testFileName ).getPath ());
@@ -448,11 +434,11 @@ public void testHtmlLanguageAttributeExtraction() throws ResourceParseException,
448434 ResourceProducer producer = ProducerUtils .getProducer (getClass ().getResource (testFileName ).getPath ());
449435 ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper ();
450436 ExtractingResourceProducer extractor = new ExtractingResourceProducer (producer , mapper );
451- checkExtractHtmlLangAttribute (extractor .getNext (), "name" , "HTML@/lang" , "content" , "en" );
452- checkExtractHtmlLangAttribute (extractor .getNext (), "name" , "HTML@/lang" , "content" , "zh-CN" );
453- checkExtractHtmlLangAttribute (extractor .getNext (), "name" , "HTML@/lang" , "content" , "cs-cz" );
454- checkExtractHtmlLangAttribute (extractor .getNext (), "name" , "HTML@/lang" , "content" , "en" );
455- checkExtractHtmlLangAttribute (extractor .getNext (), "name" , "HTML@/xml:lang" , "content" , "es-MX" );
437+ checkExtractedAttributes (extractor .getNext (), "name" , "HTML@/lang" , "content" , "en" );
438+ checkExtractedAttributes (extractor .getNext (), "name" , "HTML@/lang" , "content" , "zh-CN" );
439+ checkExtractedAttributes (extractor .getNext (), "name" , "HTML@/lang" , "content" , "cs-cz" );
440+ checkExtractedAttributes (extractor .getNext (), "name" , "HTML@/lang" , "content" , "en" );
441+ checkExtractedAttributes (extractor .getNext (), "name" , "HTML@/xml:lang" , "content" , "es-MX" );
456442 }
457443
458444 public void testBodyMetaElements () throws ResourceParseException , IOException {
0 commit comments