From aaa64d7015998f28aaffac031c4032abf73bebd6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 6 Dec 2018 11:31:02 +0100 Subject: [PATCH] LUCENE-8594: DV update are broken for updates on new field A segmemnt written with Lucene70Codec failes if it ties to update a DV field that didn't exist in the index before it was upgraded to Lucene80Codec. We bake the DV format into the FieldInfo when it's used the first time and therefor never go to the codec if we need to update. yet on a field that didn't exist before and was added during an indexing operation we have to consult the coded and get an exception. This change fixes this issue and adds the relevant bwc tests. --- .../lucene/codecs/lucene70/Lucene70Codec.java | 5 +- .../index/TestBackwardsCompatibility.java | 76 ++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java index 6841345dc690..0f397b16f550 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java @@ -55,7 +55,8 @@ public class Lucene70Codec extends Codec { private final SegmentInfoFormat segmentInfosFormat = new Lucene70SegmentInfoFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); - + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene70"); + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -66,7 +67,7 @@ public PostingsFormat getPostingsFormatForField(String field) { private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { - throw new IllegalStateException("This codec should only be used for reading, not writing"); + return defaultDVFormat; } }; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index bc7b6d8f9e0c..6f2879786056 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -1538,7 +1538,7 @@ private void assertNumericDocValues(LeafReader r, String f, String cf) throws IO assertEquals(ndvcf.longValue(), ndvf.longValue()*2); } } - + private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOException { BinaryDocValues bdvf = r.getBinaryDocValues(f); BinaryDocValues bdvcf = r.getBinaryDocValues(cf); @@ -1548,7 +1548,7 @@ private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOE assertEquals(getValue(bdvcf), getValue(bdvf)*2); } } - + private void verifyDocValues(Directory dir) throws IOException { DirectoryReader reader = DirectoryReader.open(dir); for (LeafReaderContext context : reader.leaves()) { @@ -1576,6 +1576,7 @@ public void testDocValuesUpdates() throws Exception { updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L); updateBinary(writer, "1", "bdv1", "bdv1_c", 300L); updateBinary(writer, "1", "bdv2", "bdv2_c", 300L); + writer.commit(); verifyDocValues(dir); @@ -1587,6 +1588,77 @@ public void testDocValuesUpdates() throws Exception { writer.close(); dir.close(); } + + public void testSoftDeletes() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete"); + IndexWriter writer = new IndexWriter(dir, conf); + int maxDoc = writer.maxDoc(); + writer.updateDocValues(new Term("id", "1"),new NumericDocValuesField("__soft_delete", 1)); + + if (random().nextBoolean()) { + writer.commit(); + } + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc-1, writer.maxDoc()); + writer.close(); + dir.close(); + } + + public void testDocValuesUpdatesWithNewField() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + + // update fields and verify index + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + // introduce a new field that we later update + writer.addDocument(Arrays.asList(new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO), + new NumericDocValuesField("new_numeric", 1), + new BinaryDocValuesField("new_binary", toBytes(1)))); + writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1); + writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1)); + + writer.commit(); + Runnable assertDV = () -> { + boolean found = false; + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext ctx : reader.leaves()) { + LeafReader leafReader = ctx.reader(); + TermsEnum id = leafReader.terms("id").iterator(); + if (id.seekExact(new BytesRef("1"))) { + PostingsEnum postings = id.postings(null, PostingsEnum.NONE); + NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric"); + BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary"); + int doc; + while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + found = true; + assertTrue(binaryDocValues.advanceExact(doc)); + assertTrue(numericDocValues.advanceExact(doc)); + assertEquals(1, numericDocValues.longValue()); + assertEquals(toBytes(1), binaryDocValues.binaryValue()); + } + } + } + } catch (IOException e) { + throw new AssertionError(e); + } + assertTrue(found); + }; + assertDV.run(); + // merge all segments + writer.forceMerge(1); + writer.commit(); + assertDV.run(); + writer.close(); + dir.close(); + } // LUCENE-5907 public void testUpgradeWithNRTReader() throws Exception {