apache · blerer · Nov 26, 2025 · Dec 3, 2025 · benwtrent · Dec 1, 2025
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -69,6 +69,8 @@ Improvements
 
 * GITHUB#15124: Use RamUsageEstimator to calculate size for non-accountable queries. (Sagar Upadhyaya)
 
+* GITHUB#15453: Avoid unnecessary sorting and instantiations in readMapOfStrings. (Benjamin Lerer)
+
 Optimizations
 ---------------------
 * GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
@@ -185,7 +185,6 @@ private DocValuesConsumer getInstance(FieldInfo field, boolean ignoreCurrentForm
       }
       final String formatName = format.getName();
 
-      field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
       Integer suffix = null;
 
       ConsumerAndSuffix consumer = formats.get(format);
@@ -229,8 +228,8 @@ private DocValuesConsumer getInstance(FieldInfo field, boolean ignoreCurrentForm
         assert suffixes.containsKey(formatName);
         suffix = consumer.suffix;
       }
-
-      field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
+      field.putAttributes(
+          Map.of(PER_FIELD_FORMAT_KEY, formatName, PER_FIELD_SUFFIX_KEY, Integer.toString(suffix)));
       // TODO: we should only provide the "slice" of FIS
       // that this DVF actually sees ...
       return consumer.consumer;

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
@@ -148,7 +148,6 @@ private KnnVectorsWriter getInstance(FieldInfo field) throws IOException {
       }
       final String formatName = format.getName();
 
-      field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
       Integer suffix;
 
       WriterAndSuffix writerAndSuffix = formats.get(format);
@@ -176,7 +175,8 @@ private KnnVectorsWriter getInstance(FieldInfo field) throws IOException {
         assert suffixes.containsKey(formatName);
         suffix = writerAndSuffix.suffix;
       }
-      field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
+      field.putAttributes(
+          Map.of(PER_FIELD_FORMAT_KEY, formatName, PER_FIELD_SUFFIX_KEY, Integer.toString(suffix)));
       return writerAndSuffix.writer;
     }
 

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@@ -245,8 +245,12 @@ private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(
 
         groupBuilder.addField(field);
 
-        fieldInfo.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
-        fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(groupBuilder.suffix));
+        fieldInfo.putAttributes(
+            Map.of(
+                PER_FIELD_FORMAT_KEY,
+                formatName,
+                PER_FIELD_SUFFIX_KEY,
+                Integer.toString(groupBuilder.suffix)));
       }
 
       Map<PostingsFormat, FieldsGroup> formatToGroups =

diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
@@ -113,7 +113,7 @@ public FieldInfo(
       this.omitNorms = false;
     }
     this.dvGen = dvGen;
-    this.attributes = Objects.requireNonNull(attributes);
+    this.attributes = Map.copyOf(Objects.requireNonNull(attributes));
     this.pointDimensionCount = pointDimensionCount;
     this.pointIndexDimensionCount = pointIndexDimensionCount;
     this.pointNumBytes = pointNumBytes;
@@ -679,6 +679,20 @@ public synchronized String putAttribute(String key, String value) {
     return oldValue;
   }
 
+  /**
+   * Puts some codec attribute values.
+   *
+   * <p>If multiples attributes need to be added {@code putAttributes} is more efficient than
+   * calling {@code putAttribute} multiple times as it avoid unnecessary copies and synchronisation.
+   */
+  public synchronized void putAttributes(Map<String, String> map) {
+    HashMap<String, String> newMap = new HashMap<>(attributes);
+    newMap.putAll(map);
+    // This needs to be thread-safe as multiple threads may be updating (different) attributes
+    // concurrently due to concurrent merging.
+    attributes = Collections.unmodifiableMap(newMap);
+  }
+
   /** Returns internal codec attributes map. */
   public synchronized Map<String, String> attributes() {
     return attributes;

diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -775,7 +775,7 @@ FieldInfo add(FieldInfo fi, long dvGen) {
       if (curFi != null) {
         curFi.verifySameSchema(fi);
         if (fi.attributes() != null) {
-          fi.attributes().forEach((k, v) -> curFi.putAttribute(k, v));
+          curFi.putAttributes(fi.attributes());
         }
         if (fi.hasPayloads()) {
           curFi.setStorePayloads();

diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -362,6 +362,7 @@ public synchronized String putAttribute(String key, String value) {
     // This needs to be thread-safe because multiple threads may be updating (different) attributes
     // at the same time due to concurrent merging, plus some threads may be calling toString() on
     // segment info while other threads are updating attributes.
+    // We use unmodifiableMap instead of Map.copyOf to avoid an unnecessary copy.
     attributes = Collections.unmodifiableMap(newMap);
     return oldValue;
   }

diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java
@@ -18,14 +18,9 @@
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
 import org.apache.lucene.util.BitUtil;
 
 /**
@@ -248,17 +243,17 @@ public DataInput clone() {
   public Map<String, String> readMapOfStrings() throws IOException {
     int count = readVInt();
     if (count == 0) {
-      return Collections.emptyMap();
+      return Map.of();
     } else if (count == 1) {
-      return Collections.singletonMap(readString(), readString());
+      return Map.of(readString(), readString());
     } else {
-      Map<String, String> map = count > 10 ? new HashMap<>() : new TreeMap<>();
+      @SuppressWarnings("unchecked")
+      Map.Entry<String, String>[] entries =
+          (Map.Entry<String, String>[]) new Map.Entry<?, ?>[count];
       for (int i = 0; i < count; i++) {
-        final String key = readString();
-        final String val = readString();
-        map.put(key, val);
+        entries[i] = Map.entry(readString(), readString());
       }
-      return Collections.unmodifiableMap(map);
+      return Map.ofEntries(entries);
     }
   }
 
@@ -270,15 +265,15 @@ public Map<String, String> readMapOfStrings() throws IOException {
   public Set<String> readSetOfStrings() throws IOException {
     int count = readVInt();
     if (count == 0) {
-      return Collections.emptySet();
+      return Set.of();
     } else if (count == 1) {
-      return Collections.singleton(readString());
+      return Set.of(readString());
     } else {
-      Set<String> set = count > 10 ? new HashSet<>() : new TreeSet<>();
+      String[] set = new String[count];
       for (int i = 0; i < count; i++) {
-        set.add(readString());
+        set[i] = readString();
       }
-      return Collections.unmodifiableSet(set);
+      return Set.of(set);
     }
   }