diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 20b86915ac..405b99f506 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -548,15 +548,21 @@ - db.url.normalizers + crawldb.url.normalizers false - Normalize urls when updating crawldb + + !Temporary, can be overwritten with the command line! + Normalize urls when updating crawldb + - db.url.filters + crawldb.url.filters false - Filter urls when updating crawldb + + !Temporary, can be overwritten with the command line! + Filter urls when updating crawldb +