apache · jihoonson · Jan 27, 2020 · Mar 19, 2019 · Mar 20, 2019 · Mar 20, 2019
diff --git a/.idea/inspectionProfiles/Druid.xml b/.idea/inspectionProfiles/Druid.xml
diff --git a/.../src/main/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java b/.../src/main/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java
@@ -24,9 +24,9 @@
 import org.apache.druid.client.DataSourcesSnapshot;
 import org.apache.druid.jackson.DefaultObjectMapper;
 import org.apache.druid.java.util.common.DateTimes;
-import org.apache.druid.server.coordinator.helper.CompactionSegmentIterator;
-import org.apache.druid.server.coordinator.helper.CompactionSegmentSearchPolicy;
-import org.apache.druid.server.coordinator.helper.NewestSegmentFirstPolicy;
+import org.apache.druid.server.coordinator.duty.CompactionSegmentIterator;
+import org.apache.druid.server.coordinator.duty.CompactionSegmentSearchPolicy;
+import org.apache.druid.server.coordinator.duty.NewestSegmentFirstPolicy;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.VersionedIntervalTimeline;
 import org.apache.druid.timeline.partition.NumberedShardSpec;

diff --git a/core/src/main/java/org/apache/druid/java/util/metrics/AllocationMetricCollectors.java b/core/src/main/java/org/apache/druid/java/util/metrics/AllocationMetricCollectors.java
@@ -22,7 +22,6 @@
 import org.apache.druid.java.util.common.logger.Logger;
 
 import javax.annotation.Nullable;
-
 import java.lang.management.ManagementFactory;
 import java.lang.management.ThreadMXBean;
 import java.lang.reflect.Method;

diff --git a/core/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java b/core/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.metadata;
 
+import javax.annotation.Nullable;
 import java.util.List;
 
 /**
@@ -36,7 +37,11 @@ Void insertOrUpdate(
       byte[] value
   );
 
-  byte[] lookup(
+  /**
+   * Returns the value of the valueColumn when there is only one row matched to the given key.
+   * This method returns null if there is no such row and throws an error if there are more than one rows.
+   */
+  @Nullable byte[] lookup(
       String tableName,
       String keyColumn,
       String valueColumn,

diff --git a/core/src/main/java/org/apache/druid/metadata/MetadataStorageConnectorConfig.java b/core/src/main/java/org/apache/druid/metadata/MetadataStorageConnectorConfig.java
@@ -21,6 +21,7 @@
 
 import com.fasterxml.jackson.annotation.JsonProperty;
 import org.apache.druid.java.util.common.StringUtils;
+
 import java.util.Properties;
 
 /**

diff --git a/core/src/main/java/org/apache/druid/segment/SegmentUtils.java b/core/src/main/java/org/apache/druid/segment/SegmentUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.segment;
 
+import com.google.common.collect.Collections2;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
@@ -37,7 +38,6 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import java.util.stream.Collectors;
 
 /**
  * Utility methods useful for implementing deep storage extensions.
@@ -78,16 +78,14 @@ public static int getVersionFromDir(File inDir) throws IOException
   }
 
   /**
-   * Returns a String with identifiers of "segments" comma-separated. Useful for log messages. Not useful for anything
-   * else, because this doesn't take special effort to escape commas that occur in identifiers (not common, but could
-   * potentially occur in a datasource name).
+   * Returns an object whose toString() returns a String with identifiers of the given segments, comma-separated. Useful
+   * for log messages. Not useful for anything else, because this doesn't take special effort to escape commas that
+   * occur in identifiers (not common, but could potentially occur in a datasource name).
    */
-  public static String commaSeparateIdentifiers(final Collection<DataSegment> segments)
+  public static Object commaSeparatedIdentifiers(final Collection<DataSegment> segments)
   {
-    return segments
-        .stream()
-        .map(segment -> segment.getId().toString())
-        .collect(Collectors.joining(", "));
+    // Lazy, to avoid preliminary string creation if logging level is turned off
+    return Collections2.transform(segments, DataSegment::getId);
   }
 
   private SegmentUtils()

diff --git a/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
@@ -38,7 +38,6 @@
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -56,25 +55,38 @@
 /**
  * VersionedIntervalTimeline is a data structure that manages objects on a specific timeline.
  *
- * It associates a jodatime Interval and a generically-typed version with the object that is being stored.
+ * It associates an {@link Interval} and a generically-typed version with the object that is being stored.
  *
  * In the event of overlapping timeline entries, timeline intervals may be chunked. The underlying data associated
  * with a timeline entry remains unchanged when chunking occurs.
  *
- * After loading objects via the add() method, the lookup(Interval) method can be used to get the list of the most
- * recent objects (according to the version) that match the given interval.  The intent is that objects represent
- * a certain time period and when you do a lookup(), you are asking for all of the objects that you need to look
- * at in order to get a correct answer about that time period.
+ * After loading objects via the {@link #add} method, the {@link #lookup(Interval)} method can be used to get the list
+ * of the most recent objects (according to the version) that match the given interval. The intent is that objects
+ * represent a certain time period and when you do a {@link #lookup(Interval)}, you are asking for all of the objects
+ * that you need to look at in order to get a correct answer about that time period.
  *
- * The {@link #findFullyOvershadowed} method returns a list of objects that will never be returned by a call to lookup()
- * because they are overshadowed by some other object.  This can be used in conjunction with the add() and remove()
- * methods to achieve "atomic" updates.  First add new items, then check if those items caused anything to be
- * overshadowed, if so, remove the overshadowed elements and you have effectively updated your data set without any user
- * impact.
+ * The {@link #findFullyOvershadowed} method returns a list of objects that will never be returned by a call to {@link
+ * #lookup} because they are overshadowed by some other object. This can be used in conjunction with the {@link #add}
+ * and {@link #remove} methods to achieve "atomic" updates. First add new items, then check if those items caused
+ * anything to be overshadowed, if so, remove the overshadowed elements and you have effectively updated your data set
+ * without any user impact.
  */
 public class VersionedIntervalTimeline<VersionType, ObjectType extends Overshadowable<ObjectType>>
     implements TimelineLookup<VersionType, ObjectType>
 {
+  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterable<DataSegment> segments)
+  {
+    return forSegments(segments.iterator());
+  }
+
+  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterator<DataSegment> segments)
+  {
+    final VersionedIntervalTimeline<String, DataSegment> timeline =
+        new VersionedIntervalTimeline<>(Comparator.naturalOrder());
+    addSegments(timeline, segments);
+    return timeline;
+  }
+
   private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
 
   // Below timelines stores only *visible* timelineEntries
@@ -99,19 +111,6 @@ public VersionedIntervalTimeline(Comparator<? super VersionType> versionComparat
     this.versionComparator = versionComparator;
   }
 
-  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterable<DataSegment> segments)
-  {
-    return forSegments(segments.iterator());
-  }
-
-  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterator<DataSegment> segments)
-  {
-    final VersionedIntervalTimeline<String, DataSegment> timeline =
-        new VersionedIntervalTimeline<>(Comparator.naturalOrder());
-    addSegments(timeline, segments);
-    return timeline;
-  }
-
   public static void addSegments(
       VersionedIntervalTimeline<String, DataSegment> timeline,
       Iterator<DataSegment> segments
@@ -151,6 +150,11 @@ public Collection<ObjectType> iterateAllObjects()
     );
   }
 
+  public int getNumObjects()
+  {
+    return numObjects.get();
+  }
+
   /**
    * Computes a set with all objects falling within the specified interval which are at least partially "visible" in
    * this interval (that is, are not fully overshadowed within this interval).
@@ -371,62 +375,69 @@ public Set<TimelineObjectHolder<VersionType, ObjectType>> findFullyOvershadowed(
     lock.readLock().lock();
     try {
       // 1. Put all timelineEntries and remove all visible entries to find out only non-visible timelineEntries.
-      final Map<Interval, Map<VersionType, TimelineEntry>> overShadowed = new HashMap<>();
-      for (Map.Entry<Interval, TreeMap<VersionType, TimelineEntry>> versionEntry : allTimelineEntries.entrySet()) {
-        @SuppressWarnings("unchecked")
-        Map<VersionType, TimelineEntry> versionCopy = (TreeMap) versionEntry.getValue().clone();
-        overShadowed.put(versionEntry.getKey(), versionCopy);
-      }
-
-      for (Entry<Interval, TimelineEntry> entry : completePartitionsTimeline.entrySet()) {
-        Map<VersionType, TimelineEntry> versionEntry = overShadowed.get(entry.getValue().getTrueInterval());
-        if (versionEntry != null) {
-          versionEntry.remove(entry.getValue().getVersion());
-          if (versionEntry.isEmpty()) {
-            overShadowed.remove(entry.getValue().getTrueInterval());
-          }
-        }
-      }
-
-      for (Entry<Interval, TimelineEntry> entry : incompletePartitionsTimeline.entrySet()) {
-        Map<VersionType, TimelineEntry> versionEntry = overShadowed.get(entry.getValue().getTrueInterval());
-        if (versionEntry != null) {
-          versionEntry.remove(entry.getValue().getVersion());
-          if (versionEntry.isEmpty()) {
-            overShadowed.remove(entry.getValue().getTrueInterval());
-          }
-        }
-      }
-
-      final Set<TimelineObjectHolder<VersionType, ObjectType>> retVal = new HashSet<>();
-      for (Entry<Interval, Map<VersionType, TimelineEntry>> versionEntry : overShadowed.entrySet()) {
-        for (Entry<VersionType, TimelineEntry> entry : versionEntry.getValue().entrySet()) {
-          final TimelineEntry timelineEntry = entry.getValue();
-          retVal.add(timelineEntryToObjectHolder(timelineEntry));
-        }
-      }
+      final Map<Interval, Map<VersionType, TimelineEntry>> overshadowedPartitionsTimeline =
+          computeOvershadowedPartitionsTimeline();
+
+      final Set<TimelineObjectHolder<VersionType, ObjectType>> overshadowedObjects = overshadowedPartitionsTimeline
+          .values()
+          .stream()
+          .flatMap(
+              (Map<VersionType, TimelineEntry> entry) -> entry.values().stream().map(this::timelineEntryToObjectHolder)
+          )
+          .collect(Collectors.toSet());
 
-      // 2. Visible timelineEntries can also have overshadowed segments. Add them to the result too.
+      // 2. Visible timelineEntries can also have overshadowed objects. Add them to the result too.
       for (TimelineEntry entry : incompletePartitionsTimeline.values()) {
-        final List<PartitionChunk<ObjectType>> entryOvershadowed = entry.partitionHolder.getOvershadowed();
-        if (!entryOvershadowed.isEmpty()) {
-          retVal.add(
+        final List<PartitionChunk<ObjectType>> overshadowedEntries = entry.partitionHolder.getOvershadowed();
+        if (!overshadowedEntries.isEmpty()) {
+          overshadowedObjects.add(
               new TimelineObjectHolder<>(
                   entry.trueInterval,
                   entry.version,
-                  new PartitionHolder<>(entryOvershadowed)
+                  new PartitionHolder<>(overshadowedEntries)
               )
           );
         }
       }
 
-      return retVal;
+      return overshadowedObjects;
     }
     finally {
       lock.readLock().unlock();
     }
   }
 
+  private Map<Interval, Map<VersionType, TimelineEntry>> computeOvershadowedPartitionsTimeline()
+  {
+    final Map<Interval, Map<VersionType, TimelineEntry>> overshadowedPartitionsTimeline = new HashMap<>();
+    allTimelineEntries.forEach((Interval interval, TreeMap<VersionType, TimelineEntry> versionEntry) -> {
+      @SuppressWarnings("unchecked")
+      Map<VersionType, TimelineEntry> versionEntryCopy = (TreeMap) versionEntry.clone();
+      overshadowedPartitionsTimeline.put(interval, versionEntryCopy);
+    });
+
+    for (TimelineEntry entry : completePartitionsTimeline.values()) {
+      overshadowedPartitionsTimeline.computeIfPresent(
+          entry.getTrueInterval(),
+          (Interval interval, Map<VersionType, TimelineEntry> versionEntry) -> {
+            versionEntry.remove(entry.getVersion());
+            return versionEntry.isEmpty() ? null : versionEntry;
+          }
+      );
+    }
+
+    for (TimelineEntry entry : incompletePartitionsTimeline.values()) {
+      overshadowedPartitionsTimeline.computeIfPresent(
+          entry.getTrueInterval(),
+          (Interval interval, Map<VersionType, TimelineEntry> versionEntry) -> {
+            versionEntry.remove(entry.getVersion());
+            return versionEntry.isEmpty() ? null : versionEntry;
+          }
+      );
+    }
+    return overshadowedPartitionsTimeline;
+  }
+
   public boolean isOvershadowed(Interval interval, VersionType version, ObjectType object)
   {
     lock.readLock().lock();