From ca8f17cf6fb2ac33a2f8ca519885059af620b5a3 Mon Sep 17 00:00:00 2001 From: Pulkit Gupta Date: Fri, 5 Dec 2025 13:50:54 +0000 Subject: [PATCH 1/5] Add check for not adding same node again --- .../apache/lucene/util/hnsw/HnswGraphBuilder.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java index 70f607d3b5d0..afa05e3a4b8a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java @@ -374,18 +374,23 @@ void addDiverseNeighbors( if (hnswLock != null) { Lock lock = hnswLock.write(level, nbr); try { - NeighborArray nbrsOfNbr = getGraph().getNeighbors(level, nbr); - nbrsOfNbr.addAndEnsureDiversity(node, candidates.getScores(i), nbr, scorer); + updateNeighbor(getGraph().getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); } finally { lock.unlock(); } } else { - NeighborArray nbrsOfNbr = hnsw.getNeighbors(level, nbr); - nbrsOfNbr.addAndEnsureDiversity(node, candidates.getScores(i), nbr, scorer); + updateNeighbor(hnsw.getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); } } } + private void updateNeighbor(NeighborArray nbrsOfNbr, int node, float score, int nbr, UpdateableRandomVectorScorer scorer) throws IOException{ + for (int j = 0; j < nbrsOfNbr.size(); j++) { + if (nbrsOfNbr.nodes()[j] == node) return; + } + nbrsOfNbr.addAndEnsureDiversity(node, score, nbr, scorer); + } + /** * This method will select neighbors to add and return a mask telling the caller which candidates * are selected From 5534e4fd113ae432a8ee8d88308e98b82f52d5a9 Mon Sep 17 00:00:00 2001 From: Pulkit Gupta Date: Fri, 5 Dec 2025 15:09:28 +0000 Subject: [PATCH 2/5] Added entry to Changes.txt --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index abf067a8a2b8..448656c8c6e2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -255,6 +255,8 @@ Bug Fixes * GITHUB#15380: Fix potential EOF introduced by optimized filter iterations in MaxScoreBulkScorer. The approximation and the match verification could get out of sync, resulting in an EOFException. (Ben Trent) +* GITHUB#15478: Fix duplicate neighbor issue by checking node existence before making reverse link. (Pulkit Gupta) + Other --------------------- * GITHUB#15237: Fix SmartChinese to only deserialize dictionary data from classpath with a native array From 9e9b91db054cb6604d59e75b848b631717068d2f Mon Sep 17 00:00:00 2001 From: Pulkit Gupta Date: Fri, 5 Dec 2025 15:23:06 +0000 Subject: [PATCH 3/5] tidy --- .../java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java index afa05e3a4b8a..b42eb6b802f0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java @@ -374,7 +374,8 @@ void addDiverseNeighbors( if (hnswLock != null) { Lock lock = hnswLock.write(level, nbr); try { - updateNeighbor(getGraph().getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); + updateNeighbor( + getGraph().getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); } finally { lock.unlock(); } @@ -384,7 +385,9 @@ void addDiverseNeighbors( } } - private void updateNeighbor(NeighborArray nbrsOfNbr, int node, float score, int nbr, UpdateableRandomVectorScorer scorer) throws IOException{ + private void updateNeighbor( + NeighborArray nbrsOfNbr, int node, float score, int nbr, UpdateableRandomVectorScorer scorer) + throws IOException { for (int j = 0; j < nbrsOfNbr.size(); j++) { if (nbrsOfNbr.nodes()[j] == node) return; } From f152fdc98fe53fe8ee61a8a87f0c7f3140aab72c Mon Sep 17 00:00:00 2001 From: Pulkit Gupta Date: Fri, 5 Dec 2025 18:13:49 +0000 Subject: [PATCH 4/5] Use flag while performing check --- .../lucene/util/hnsw/HnswGraphBuilder.java | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java index b42eb6b802f0..27ec81a676cb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java @@ -349,7 +349,7 @@ void addDiverseNeighbors( int node, NeighborArray candidates, UpdateableRandomVectorScorer scorer, - boolean outOfOrderInsertion) + boolean isLinkRepair) throws IOException { /* For each of the beamWidth nearest candidates (going from best to worst), select it only if it * is closer to target than it is to any of the already-selected neighbors (ie selected in this method, @@ -358,8 +358,7 @@ void addDiverseNeighbors( NeighborArray neighbors = hnsw.getNeighbors(level, node); int maxConnOnLevel = level == 0 ? M * 2 : M; boolean[] mask = - selectAndLinkDiverse( - node, neighbors, candidates, maxConnOnLevel, scorer, outOfOrderInsertion); + selectAndLinkDiverse(node, neighbors, candidates, maxConnOnLevel, scorer, isLinkRepair); // Link the selected nodes to the new node, and the new node to the selected nodes (again // applying diversity heuristic) @@ -374,22 +373,43 @@ void addDiverseNeighbors( if (hnswLock != null) { Lock lock = hnswLock.write(level, nbr); try { + // Check for duplicates only during link repair updateNeighbor( - getGraph().getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); + getGraph().getNeighbors(level, nbr), + node, + candidates.getScores(i), + nbr, + scorer, + isLinkRepair); } finally { lock.unlock(); } } else { - updateNeighbor(hnsw.getNeighbors(level, nbr), node, candidates.getScores(i), nbr, scorer); + updateNeighbor( + hnsw.getNeighbors(level, nbr), + node, + candidates.getScores(i), + nbr, + scorer, + isLinkRepair); } } } private void updateNeighbor( - NeighborArray nbrsOfNbr, int node, float score, int nbr, UpdateableRandomVectorScorer scorer) + NeighborArray nbrsOfNbr, + int node, + float score, + int nbr, + UpdateableRandomVectorScorer scorer, + boolean isLinkRepair) throws IOException { - for (int j = 0; j < nbrsOfNbr.size(); j++) { - if (nbrsOfNbr.nodes()[j] == node) return; + // Only check for duplicates during link repair to avoid performance overhead during normal + // construction + if (isLinkRepair) { + for (int j = 0; j < nbrsOfNbr.size(); j++) { + if (nbrsOfNbr.nodes()[j] == node) return; + } } nbrsOfNbr.addAndEnsureDiversity(node, score, nbr, scorer); } @@ -404,7 +424,7 @@ private boolean[] selectAndLinkDiverse( NeighborArray candidates, int maxConnOnLevel, UpdateableRandomVectorScorer scorer, - boolean outOfOrderInsertion) + boolean isLinkRepair) throws IOException { boolean[] mask = new boolean[candidates.size()]; // Select the best maxConnOnLevel neighbors of the new node, applying the diversity heuristic @@ -422,7 +442,7 @@ private boolean[] selectAndLinkDiverse( mask[i] = true; // here we don't need to lock, because there's no incoming link so no others is able to // discover this node such that no others will modify this neighbor array as well - if (outOfOrderInsertion) { + if (isLinkRepair) { neighbors.addOutOfOrder(cNode, cScore); } else { neighbors.addInOrder(cNode, cScore); From b854de0357650bccef263ab07fc08db9a6a1b424 Mon Sep 17 00:00:00 2001 From: Pulkit Gupta Date: Fri, 5 Dec 2025 18:21:26 +0000 Subject: [PATCH 5/5] Remove redundant comment --- .../src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java index 27ec81a676cb..2e55e3e06649 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java @@ -373,7 +373,6 @@ void addDiverseNeighbors( if (hnswLock != null) { Lock lock = hnswLock.write(level, nbr); try { - // Check for duplicates only during link repair updateNeighbor( getGraph().getNeighbors(level, nbr), node,