apache · jihoonson · Jan 27, 2020 · Mar 19, 2019 · Mar 20, 2019 · Mar 20, 2019
diff --git a/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
@@ -30,9 +30,12 @@
 import org.apache.druid.timeline.partition.ImmutablePartitionHolder;
 import org.apache.druid.timeline.partition.PartitionChunk;
 import org.apache.druid.timeline.partition.PartitionHolder;
+import org.apache.druid.utils.CollectionUtils;
 import org.joda.time.Interval;
 
+import javax.annotation.Nullable;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -44,28 +47,43 @@
 import java.util.Objects;
 import java.util.Set;
 import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.stream.StreamSupport;
 
 /**
  * VersionedIntervalTimeline is a data structure that manages objects on a specific timeline.
  *
- * It associates a jodatime Interval and a generically-typed version with the object that is being stored.
+ * It associates an {@link Interval} and a generically-typed version with the object that is being stored.
  *
  * In the event of overlapping timeline entries, timeline intervals may be chunked. The underlying data associated
  * with a timeline entry remains unchanged when chunking occurs.
  *
- * After loading objects via the add() method, the lookup(Interval) method can be used to get the list of the most
- * recent objects (according to the version) that match the given interval.  The intent is that objects represent
- * a certain time period and when you do a lookup(), you are asking for all of the objects that you need to look
- * at in order to get a correct answer about that time period.
+ * After loading objects via the {@link #add} method, the {@link #lookup(Interval)} method can be used to get the list
+ * of the most recent objects (according to the version) that match the given interval. The intent is that objects
+ * represent a certain time period and when you do a {@link #lookup(Interval)}, you are asking for all of the objects
+ * that you need to look at in order to get a correct answer about that time period.
  *
- * The findOvershadowed() method returns a list of objects that will never be returned by a call to lookup() because
- * they are overshadowed by some other object.  This can be used in conjunction with the add() and remove() methods
- * to achieve "atomic" updates.  First add new items, then check if those items caused anything to be overshadowed, if
- * so, remove the overshadowed elements and you have effectively updated your data set without any user impact.
+ * The {@link #findOvershadowed} method returns a list of objects that will never be returned by a call to {@link
+ * #lookup} because they are overshadowed by some other object. This can be used in conjunction with the {@link #add}
+ * and {@link #remove} methods to achieve "atomic" updates.  First add new items, then check if those items caused
+ * anything to be overshadowed, if so, remove the overshadowed elements and you have effectively updated your data set
+ * without any user impact.
  */
 public class VersionedIntervalTimeline<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
 {
+  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterable<DataSegment> segments)
+  {
+    return forSegments(segments.iterator());
+  }
+
+  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterator<DataSegment> segments)
+  {
+    final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
+    addSegments(timeline, segments);
+    return timeline;
+  }
+
   private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
 
   final NavigableMap<Interval, TimelineEntry> completePartitionsTimeline = new TreeMap<Interval, TimelineEntry>(
@@ -75,6 +93,7 @@ public class VersionedIntervalTimeline<VersionType, ObjectType> implements Timel
       Comparators.intervalsByStartThenEnd()
   );
   private final Map<Interval, TreeMap<VersionType, TimelineEntry>> allTimelineEntries = new HashMap<>();
+  private final AtomicInteger numObjects = new AtomicInteger();
 
   private final Comparator<? super VersionType> versionComparator;
 
@@ -85,18 +104,6 @@ public VersionedIntervalTimeline(
     this.versionComparator = versionComparator;
   }
 
-  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterable<DataSegment> segments)
-  {
-    return forSegments(segments.iterator());
-  }
-
-  public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterator<DataSegment> segments)
-  {
-    final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
-    addSegments(timeline, segments);
-    return timeline;
-  }
-
   public static void addSegments(
       VersionedIntervalTimeline<String, DataSegment> timeline,
       Iterator<DataSegment> segments
@@ -115,6 +122,32 @@ public Map<Interval, TreeMap<VersionType, TimelineEntry>> getAllTimelineEntries(
     return allTimelineEntries;
   }
 
+  /**
+   * Returns a lazy collection with all objects in this VersionedIntervalTimeline to be used for iteration or {@link
+   * Collection#stream()} transformation. The order of objects in this collection is unspecified.
+   *
+   * Note: iteration over the returned collection may not be as trivially cheap as, for example, iteration over an
+   * ArrayList. Try (to some reasonable extent) to organize the code so that it iterates the returned collection only
+   * once rather than several times.
+   */
+  public Collection<ObjectType> iterateAllObjects()
+  {
+    return CollectionUtils.createLazyCollectionFromStream(
+        () -> allTimelineEntries
+            .values()
+            .stream()
+            .flatMap((TreeMap<VersionType, TimelineEntry> entryMap) -> entryMap.values().stream())
+            .flatMap((TimelineEntry entry) -> StreamSupport.stream(entry.getPartitionHolder().spliterator(), false))
+            .map(PartitionChunk::getObject),
+        numObjects.get()
+    );
+  }
+
+  public int getNumObjects()
+  {
+    return numObjects.get();
+  }
+
   public void add(final Interval interval, VersionType version, PartitionChunk<ObjectType> object)
   {
     addAll(Iterators.singletonIterator(object), o -> interval, o -> version);
@@ -143,15 +176,19 @@ private void addAll(
           TreeMap<VersionType, TimelineEntry> versionEntry = new TreeMap<>(versionComparator);
           versionEntry.put(version, entry);
           allTimelineEntries.put(interval, versionEntry);
+          numObjects.incrementAndGet();
         } else {
           entry = exists.get(version);
 
           if (entry == null) {
             entry = new TimelineEntry(interval, version, new PartitionHolder<>(object));
             exists.put(version, entry);
+            numObjects.incrementAndGet();
           } else {
             PartitionHolder<ObjectType> partitionHolder = entry.getPartitionHolder();
-            partitionHolder.add(object);
+            if (partitionHolder.add(object)) {
+              numObjects.incrementAndGet();
+            }
           }
         }
 
@@ -174,6 +211,7 @@ private void addAll(
     }
   }
 
+  @Nullable
   public PartitionChunk<ObjectType> remove(Interval interval, VersionType version, PartitionChunk<ObjectType> chunk)
   {
     try {
@@ -189,7 +227,11 @@ public PartitionChunk<ObjectType> remove(Interval interval, VersionType version,
         return null;
       }
 
-      PartitionChunk<ObjectType> retVal = entry.getPartitionHolder().remove(chunk);
+      PartitionChunk<ObjectType> removedChunk = entry.getPartitionHolder().remove(chunk);
+      if (removedChunk == null) {
+        return null;
+      }
+      numObjects.decrementAndGet();
       if (entry.getPartitionHolder().isEmpty()) {
         versionEntries.remove(version);
         if (versionEntries.isEmpty()) {
@@ -201,7 +243,7 @@ public PartitionChunk<ObjectType> remove(Interval interval, VersionType version,
 
       remove(completePartitionsTimeline, interval, entry, false);
 
-      return retVal;
+      return removedChunk;
     }
     finally {
       lock.writeLock().unlock();
@@ -217,9 +259,7 @@ public PartitionHolder<ObjectType> findEntry(Interval interval, VersionType vers
         if (entry.getKey().equals(interval) || entry.getKey().contains(interval)) {
           TimelineEntry foundEntry = entry.getValue().get(version);
           if (foundEntry != null) {
-            return new ImmutablePartitionHolder<ObjectType>(
-                foundEntry.getPartitionHolder()
-            );
+            return new ImmutablePartitionHolder<>(foundEntry.getPartitionHolder());
           }
         }
       }

diff --git a/core/src/main/java/org/apache/druid/timeline/partition/ImmutablePartitionHolder.java b/core/src/main/java/org/apache/druid/timeline/partition/ImmutablePartitionHolder.java
@@ -23,7 +23,7 @@
  */
 public class ImmutablePartitionHolder<T> extends PartitionHolder<T>
 {
-  public ImmutablePartitionHolder(PartitionHolder partitionHolder)
+  public ImmutablePartitionHolder(PartitionHolder<T> partitionHolder)
   {
     super(partitionHolder);
   }
@@ -35,7 +35,7 @@ public PartitionChunk<T> remove(PartitionChunk<T> tPartitionChunk)
   }
 
   @Override
-  public void add(PartitionChunk<T> tPartitionChunk)
+  public boolean add(PartitionChunk<T> tPartitionChunk)
   {
     throw new UnsupportedOperationException();
   }

diff --git a/core/src/main/java/org/apache/druid/timeline/partition/PartitionHolder.java b/core/src/main/java/org/apache/druid/timeline/partition/PartitionHolder.java
@@ -22,72 +22,62 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
+import javax.annotation.Nullable;
 import java.util.Iterator;
 import java.util.List;
-import java.util.SortedSet;
 import java.util.Spliterator;
-import java.util.TreeSet;
+import java.util.TreeMap;
 
 /**
  * An object that clumps together multiple other objects which each represent a shard of some space.
  */
 public class PartitionHolder<T> implements Iterable<PartitionChunk<T>>
 {
-  private final TreeSet<PartitionChunk<T>> holderSet;
+  private final TreeMap<PartitionChunk<T>, PartitionChunk<T>> holderMap;
 
   public PartitionHolder(PartitionChunk<T> initialChunk)
   {
-    this.holderSet = new TreeSet<>();
+    this.holderMap = new TreeMap<>();
     add(initialChunk);
   }
 
   public PartitionHolder(List<PartitionChunk<T>> initialChunks)
   {
-    this.holderSet = new TreeSet<>();
+    this.holderMap = new TreeMap<>();
     for (PartitionChunk<T> chunk : initialChunks) {
       add(chunk);
     }
   }
 
-  public PartitionHolder(PartitionHolder partitionHolder)
+  public PartitionHolder(PartitionHolder<T> partitionHolder)
   {
-    this.holderSet = new TreeSet<>();
-    this.holderSet.addAll(partitionHolder.holderSet);
+    this.holderMap = new TreeMap<>();
+    this.holderMap.putAll(partitionHolder.holderMap);
   }
 
-  public void add(PartitionChunk<T> chunk)
+  public boolean add(PartitionChunk<T> chunk)
   {
-    holderSet.add(chunk);
+    return holderMap.putIfAbsent(chunk, chunk) == null;
   }
 
+  @Nullable
   public PartitionChunk<T> remove(PartitionChunk<T> chunk)
   {
-    if (!holderSet.isEmpty()) {
-      // Somewhat funky implementation in order to return the removed object as it exists in the set
-      SortedSet<PartitionChunk<T>> tailSet = holderSet.tailSet(chunk, true);
-      if (!tailSet.isEmpty()) {
-        PartitionChunk<T> element = tailSet.first();
-        if (chunk.equals(element)) {
-          holderSet.remove(element);
-          return element;
-        }
-      }
-    }
-    return null;
+    return holderMap.remove(chunk);
   }
 
   public boolean isEmpty()
   {
-    return holderSet.isEmpty();
+    return holderMap.isEmpty();
   }
 
   public boolean isComplete()
   {
-    if (holderSet.isEmpty()) {
+    if (holderMap.isEmpty()) {
       return false;
     }
 
-    Iterator<PartitionChunk<T>> iter = holderSet.iterator();
+    Iterator<PartitionChunk<T>> iter = holderMap.keySet().iterator();
 
     PartitionChunk<T> curr = iter.next();
 
@@ -117,7 +107,7 @@ public boolean isComplete()
   public PartitionChunk<T> getChunk(final int partitionNum)
   {
     final Iterator<PartitionChunk<T>> retVal = Iterators.filter(
-        holderSet.iterator(),
+        holderMap.keySet().iterator(),
         input -> input.getChunkNumber() == partitionNum
     );
 
@@ -127,13 +117,13 @@ public PartitionChunk<T> getChunk(final int partitionNum)
   @Override
   public Iterator<PartitionChunk<T>> iterator()
   {
-    return holderSet.iterator();
+    return holderMap.keySet().iterator();
   }
 
   @Override
   public Spliterator<PartitionChunk<T>> spliterator()
   {
-    return holderSet.spliterator();
+    return holderMap.keySet().spliterator();
   }
 
   public Iterable<T> payloads()
@@ -153,7 +143,7 @@ public boolean equals(Object o)
 
     PartitionHolder that = (PartitionHolder) o;
 
-    if (!holderSet.equals(that.holderSet)) {
+    if (!holderMap.equals(that.holderMap)) {
       return false;
     }
 
@@ -163,14 +153,14 @@ public boolean equals(Object o)
   @Override
   public int hashCode()
   {
-    return holderSet.hashCode();
+    return holderMap.hashCode();
   }
 
   @Override
   public String toString()
   {
     return "PartitionHolder{" +
-           "holderSet=" + holderSet +
+           "holderMap=" + holderMap +
            '}';
   }
 }