Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

NUTCH-1252 SegmentReader -get shows wrong data

git-svn-id: https://svn.apache.org/repos/asf/nutch/trunk@1397281 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
commit 68d638d596dae0b0a8b46f94477576c8dd169f53 1 parent f3e5a31
Sebastian Nagel authored
4 CHANGES.txt
View
@@ -2,7 +2,9 @@ Nutch Change Log
(trunk) Current Development:
-* NUTCH-1344 BasicURLNormalizer to normalize https same as http
+* NUTCH-1252 SegmentReader -get shows wrong data (snagel)
+
+* NUTCH-1344 BasicURLNormalizer to normalize https same as http (snagel)
* NUTCH-706 Url regex normalizer: pattern for session id removal not to match "newsId" (Meghna Kukreja via snagel)
13 src/java/org/apache/nutch/segment/SegmentReader.java
View
@@ -386,8 +386,15 @@ public void run() {
Writable value = (Writable)valueClass.newInstance();
// we don't know the partitioning schema
for (int i = 0; i < readers.length; i++) {
- if (readers[i].get(key, value) != null)
+ if (readers[i].get(key, value) != null) {
res.add(value);
+ value = (Writable)valueClass.newInstance();
+ Text aKey = (Text) keyClass.newInstance();
+ while (readers[i].next(aKey, value) && aKey.equals(key)) {
+ res.add(value);
+ value = (Writable)valueClass.newInstance();
+ }
+ }
readers[i].close();
}
return res;
@@ -404,8 +411,10 @@ public void run() {
Writable value = (Writable)valueClass.newInstance();
for (int i = 0; i < readers.length; i++) {
while (readers[i].next(aKey, value)) {
- if (aKey.equals(key))
+ if (aKey.equals(key)) {
res.add(value);
+ value = (Writable)valueClass.newInstance();
+ }
}
readers[i].close();
}
Please sign in to comment.
Something went wrong with that request. Please try again.