Permalink
Browse files

Merge branch 'reduce_autotuner' of https://github.com/configurator/ra…

  • Loading branch information...
2 parents 55c21da + 9f8a7dd commit 0527165b39b76345921e706d3b63d60781f5400c @ayende committed May 9, 2012
@@ -19,6 +19,8 @@ public class DatabaseStatistics
public string[] StaleIndexes { get; set; }
public int CurrentNumberOfItemsToIndexInSingleBatch { get; set; }
+
+ public int CurrentNumberOfItemsToReduceInSingleBatch { get; set; }
public IndexStats[] Indexes { get; set; }
@@ -37,8 +37,10 @@ public InMemoryRavenConfiguration()
Settings = new NameValueCollection(StringComparer.InvariantCultureIgnoreCase);
BackgroundTasksPriority = ThreadPriority.Normal;
- MaxNumberOfItemsToIndexInSingleBatch = Environment.Is64BitProcess ? 128*1024 : 64*1024;
+ MaxNumberOfItemsToIndexInSingleBatch = Environment.Is64BitProcess ? 128 * 1024 : 64 * 1024;
+ MaxNumberOfItemsToReduceInSingleBatch = MaxNumberOfItemsToIndexInSingleBatch / 2;
InitialNumberOfItemsToIndexInSingleBatch = Environment.Is64BitProcess ? 512 : 256;
+ InitialNumberOfItemsToReduceInSingleBatch = InitialNumberOfItemsToIndexInSingleBatch / 2;
AvailableMemoryForRaisingIndexBatchSizeLimit = Math.Min(768, MemoryStatistics.TotalPhysicalMemory/2);
MaxNumberOfParallelIndexTasks = 8;
@@ -110,6 +112,19 @@ public void Initialize()
InitialNumberOfItemsToIndexInSingleBatch = Math.Min(int.Parse(initialNumberOfItemsToIndexInSingleBatch),
MaxNumberOfItemsToIndexInSingleBatch);
}
+ var maxNumberOfItemsToReduceInSingleBatch = Settings["Raven/MaxNumberOfItemsToReduceInSingleBatch"];
+ if (maxNumberOfItemsToReduceInSingleBatch != null)
+ {
+ MaxNumberOfItemsToReduceInSingleBatch = Math.Max(int.Parse(maxNumberOfItemsToReduceInSingleBatch), 128);
+ InitialNumberOfItemsToReduceInSingleBatch = Math.Min(MaxNumberOfItemsToReduceInSingleBatch,
+ InitialNumberOfItemsToReduceInSingleBatch);
+ }
+ var initialNumberOfItemsToReduceInSingleBatch = Settings["Raven/InitialNumberOfItemsToReduceInSingleBatch"];
+ if (initialNumberOfItemsToReduceInSingleBatch != null)
+ {
+ InitialNumberOfItemsToReduceInSingleBatch = Math.Min(int.Parse(initialNumberOfItemsToReduceInSingleBatch),
+ MaxNumberOfItemsToReduceInSingleBatch);
+ }
var maxNumberOfParallelIndexTasks = Settings["Raven/MaxNumberOfParallelIndexTasks"];
MaxNumberOfParallelIndexTasks = maxNumberOfParallelIndexTasks != null ? int.Parse(maxNumberOfParallelIndexTasks) : Environment.ProcessorCount;
@@ -347,11 +362,23 @@ public string ServerUrl
/// <summary>
/// The initial number of items to take when indexing a batch
- /// Default: 5,000
+ /// Default: 512 or 256 depending on CPU architecture
/// </summary>
public int InitialNumberOfItemsToIndexInSingleBatch { get; set; }
/// <summary>
+ /// Max number of items to take for reducing in a batch
+ /// Minimum: 128
+ /// </summary>
+ public int MaxNumberOfItemsToReduceInSingleBatch { get; set; }
+
+ /// <summary>
+ /// The initial number of items to take when reducing a batch
+ /// Default: 256 or 128 depending on CPU architecture
+ /// </summary>
+ public int InitialNumberOfItemsToReduceInSingleBatch { get; set; }
+
+ /// <summary>
/// The maximum number of indexing tasks allowed to run in parallel
/// Default: The number of processors in the current machine
/// </summary>
@@ -246,6 +246,7 @@ public DatabaseStatistics Statistics
var result = new DatabaseStatistics
{
CurrentNumberOfItemsToIndexInSingleBatch = workContext.CurrentNumberOfItemsToIndexInSingleBatch,
+ CurrentNumberOfItemsToReduceInSingleBatch = workContext.CurrentNumberOfItemsToReduceInSingleBatch,
CountOfIndexes = IndexStorage.Indexes.Length,
Errors = workContext.Errors,
Triggers = PutTriggers.Select(x => new DatabaseStatistics.TriggerInfo {Name = x.ToString(), Type = "Put"})
@@ -19,15 +19,13 @@ public abstract class AbstractIndexingExecuter
protected ITransactionalStorage transactionalStorage;
protected int workCounter;
protected int lastFlushedWorkCounter;
- protected IndexBatchSizeAutoTuner autoTuner;
+ protected BaseBatchSizeAutoTuner autoTuner;
protected AbstractIndexingExecuter(ITransactionalStorage transactionalStorage, WorkContext context, TaskScheduler scheduler)
{
this.transactionalStorage = transactionalStorage;
this.context = context;
this.scheduler = scheduler;
-
- autoTuner = new IndexBatchSizeAutoTuner(context);
}
public void Execute()
@@ -0,0 +1,185 @@
+using System;
+using Raven.Database.Config;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Raven.Database.Indexing
+{
+ public abstract class BaseBatchSizeAutoTuner
+ {
+ protected readonly WorkContext context;
+
+ private int currentNumber;
+
+ public BaseBatchSizeAutoTuner(WorkContext context)
+ {
+ this.context = context;
+ this.NumberOfItemsToIndexInSingleBatch = InitialNumberOfItems;
+ }
+
+ public int NumberOfItemsToIndexInSingleBatch
+ {
+ get { return currentNumber; }
+ set
+ {
+ CurrentNumberOfItems = currentNumber = value;
+ }
+ }
+
+ public void AutoThrottleBatchSize(int amountOfItemsToIndex, int size)
+ {
+ try
+ {
+ if (ReduceBatchSizeIfCloseToMemoryCeiling())
+ return;
+ if (ConsiderDecreasingBatchSize(amountOfItemsToIndex))
+ return;
+ ConsiderIncreasingBatchSize(amountOfItemsToIndex, size);
+ }
+ finally
+ {
+ RecordAmountOfItems(amountOfItemsToIndex);
+ }
+ }
+
+ private void ConsiderIncreasingBatchSize(int amountOfItemsToIndex, int size)
+ {
+ if (amountOfItemsToIndex < NumberOfItemsToIndexInSingleBatch)
+ {
+ return;
+ }
+
+ if (GetLastAmountOfItems().Any(x => x < NumberOfItemsToIndexInSingleBatch))
+ {
+ // this is the first time we hit the limit, we will give another go before we increase
+ // the batch size
+ return;
+ }
+
+ // in the previous run, we also hit the current limit, we need to check if we can increase the max batch size
+
+ // here we make the assumptions that the average size of documents are the same. We check if we doubled the amount of memory
+ // that we used for the last batch (note that this is only an estimate number, but should be close enough), would we still be
+ // within the limits that governs us
+
+ var sizeInMegabytes = size / 1024 / 1024;
+
+ // we don't actually *know* what the actual cost of indexing, beause that depends on many factors (how the index
+ // is structured, is it analyzed/default/not analyzed, etc). We just assume for now that it takes 25% of the actual
+ // on disk structure per each active index. That should give us a good guesstimate about the value.
+ // Because of the way we are executing indexes, only N are running at once, where N is the parallel level, so we take
+ // that into account, you may have 10 indexes but only 2 CPUs, so we only consider the cost of executing 2 indexes,
+ // not all 10
+ var sizedPlusIndexingCost = sizeInMegabytes * (1 + (0.25 * Math.Min(context.IndexDefinitionStorage.IndexesCount, context.Configuration.MaxNumberOfParallelIndexTasks)));
+
+ var remainingMemoryAfterBatchSizeIncrease = MemoryStatistics.AvailableMemory - sizedPlusIndexingCost;
+
+ if (remainingMemoryAfterBatchSizeIncrease >= context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
+ {
+ NumberOfItemsToIndexInSingleBatch = Math.Min(MaxNumberOfItems,
+ NumberOfItemsToIndexInSingleBatch * 2);
+ return;
+ }
+
+
+ }
+
+ private bool ReduceBatchSizeIfCloseToMemoryCeiling()
+ {
+ if (MemoryStatistics.AvailableMemory >= context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
+ {
+ // there is enough memory available for the next indexing run
+ return false;
+ }
+
+ // we are using too much memory, let us use a less next time...
+ // maybe it is us? we generate a lot of garbage when doing indexing, so we ask the GC if it would kindly try to
+ // do something about it.
+ // Note that this order for this to happen we need:
+ // * We had two full run when we were doing nothing but indexing at full throttle
+ // * The system is over the configured limit, and there is a strong likelihood that this is us causing this
+ // * By forcing a GC, we ensure that we use less memory, and it is not frequent enough to cause perf problems
+
+ GC.Collect(1, GCCollectionMode.Optimized);
+
+ // let us check again after the GC call, do we still need to reduce the batch size?
+
+ if (MemoryStatistics.AvailableMemory > context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
+ {
+ // we don't want to try increasing things, we just hit the ceiling, maybe on the next try
+ return true;
+ }
+
+ // we are still too high, let us reduce the size and see what is going on.
+
+ NumberOfItemsToIndexInSingleBatch = Math.Max(InitialNumberOfItems,
+ NumberOfItemsToIndexInSingleBatch / 2);
+
+ return true;
+ }
+
+ private bool ConsiderDecreasingBatchSize(int amountOfItemsToIndex)
+ {
+ if (amountOfItemsToIndex >= NumberOfItemsToIndexInSingleBatch)
+ {
+ // we had as much work to do as we are currently capable of handling
+ // there isn't nothing that we need to do here...
+ return false;
+ }
+
+ // we didn't have a lot of work to do, so let us see if we can reduce the batch size
+
+ // we are at the configured minimum, nothing to do
+ if (NumberOfItemsToIndexInSingleBatch == InitialNumberOfItems)
+ return true;
+
+ // we were above the max the last times, we can't reduce the work load now
+ if (GetLastAmountOfItems().Any(x => x > NumberOfItemsToIndexInSingleBatch))
+ return true;
+
+ var old = NumberOfItemsToIndexInSingleBatch;
+ // we have had a couple of times were we didn't get to the current max, so we can probably
+ // reduce the max again now, this will reduce the memory consumption eventually, and will cause
+ // faster indexing times in case we get a big batch again
+ NumberOfItemsToIndexInSingleBatch = Math.Max(InitialNumberOfItems,
+ NumberOfItemsToIndexInSingleBatch / 2);
+
+ // we just reduced the batch size because we have two concurrent runs where we had
+ // less to do than the previous runs. That indicate the the busy period is over, maybe we
+ // run out of data? Or the rate of data entry into the system was just reduce?
+ // At any rate, there is a strong likelyhood of having a lot of garbage in the system
+ // let us ask the GC nicely to clean it
+
+ // but we only want to do it if the change was significant
+ if (NumberOfItemsToIndexInSingleBatch - old > 4096)
+ {
+ GC.Collect(1, GCCollectionMode.Optimized);
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// This let us know that an OOME has happened, and we need to be much more
+ /// conservative with regards to how fast we can grow memory.
+ /// </summary>
+ public void OutOfMemoryExceptionHappened()
+ {
+ // first thing to do, reset the number of items per batch
+ NumberOfItemsToIndexInSingleBatch = InitialNumberOfItems;
+
+ // now, we need to be more conservative about how we are increasing memory usage, so instead of increasing
+ // every time we hit the limit twice, we will increase every time we hit it three times, then 5, 9, etc
+
+ LastAmountOfItemsToRemember *= 2;
+ }
+
+ // The following methods and properties are wrappers around members of the context which are different for the different indexes
+ protected abstract int InitialNumberOfItems { get; }
+ protected abstract int MaxNumberOfItems { get; }
+ protected abstract int CurrentNumberOfItems { get; set; }
+ protected abstract int LastAmountOfItemsToRemember { get; set; }
+ protected abstract void RecordAmountOfItems(int numberOfItems);
+ protected abstract IEnumerable<int> GetLastAmountOfItems();
+ }
+}
@@ -15,17 +15,21 @@ public class FairIndexingSchedulerWithNewIndexesBias : IIndexingScheduler
private int currentRepeated;
private bool activeFiltering;
private List<int> lastAmountOfItemsToIndex = new List<int>();
+ private List<int> lastAmountOfItemsToReduce = new List<int>();
public FairIndexingSchedulerWithNewIndexesBias()
{
LastAmountOfItemsToIndexToRemember = 1;
+ LastAmountOfItemsToReduceToRemember = 1;
}
public int LastAmountOfItemsToIndexToRemember { get; set; }
+ public int LastAmountOfItemsToReduceToRemember { get; set; }
+
public IList<IndexToWorkOn> FilterMapIndexes(IList<IndexToWorkOn> indexes)
{
- if(indexes.Count == 0)
+ if (indexes.Count == 0)
return indexes;
var indexesByIndexedEtag = indexes
@@ -40,7 +44,7 @@ public IList<IndexToWorkOn> FilterMapIndexes(IList<IndexToWorkOn> indexes)
activeFiltering = false;
return indexes; // they all have the same one, so there aren't any delayed / new indexes
}
-
+
activeFiltering = true;
// we have indexes that haven't all caught up with up yet, so we need to start cycling through the
@@ -66,16 +70,16 @@ public void RecordAmountOfItemsToIndex(int value)
{
var currentLastAmountOfItemsToIndex = lastAmountOfItemsToIndex;
var amountOfItemsToIndex = activeFiltering && currentLastAmountOfItemsToIndex.Count > 0
- ? // if we are actively filtering, we have multiple levels, so we have to assume
- // that the max amount is still the current one, this prevent the different levels of indexing batch
- // size from "fighting" over the batch size.
- Math.Max(currentLastAmountOfItemsToIndex.Max(), value)
- : value;
+ // if we are actively filtering, we have multiple levels, so we have to assume
+ // that the max amount is still the current one, this prevent the different levels of indexing batch
+ // size from "fighting" over the batch size.
+ ? Math.Max(currentLastAmountOfItemsToIndex.Max(), value)
+ : value;
var amountToTake = currentLastAmountOfItemsToIndex.Count;
if (amountToTake + 1 >= LastAmountOfItemsToIndexToRemember)
{
- amountToTake = currentLastAmountOfItemsToIndex.Count-1;
+ amountToTake = currentLastAmountOfItemsToIndex.Count - 1;
}
lastAmountOfItemsToIndex = new List<int>(currentLastAmountOfItemsToIndex.Take(amountToTake))
{
@@ -84,11 +88,38 @@ public void RecordAmountOfItemsToIndex(int value)
}
+ public void RecordAmountOfItemsToReduce(int value)
+ {
+ var currentLastAmountOfItemsToReduce = lastAmountOfItemsToReduce;
+ var amountOfItemsToReduce = activeFiltering && currentLastAmountOfItemsToReduce.Count > 0
+ // if we are actively filtering, we have multiple levels, so we have to assume
+ // that the max amount is still the current one, this prevent the different levels of indexing batch
+ // size from "fighting" over the batch size.
+ ? Math.Max(currentLastAmountOfItemsToReduce.Max(), value)
+ : value;
+
+ var amountToTake = currentLastAmountOfItemsToReduce.Count;
+ if (amountToTake + 1 >= LastAmountOfItemsToReduceToRemember)
+ {
+ amountToTake = currentLastAmountOfItemsToReduce.Count - 1;
+ }
+ lastAmountOfItemsToReduce = new List<int>(currentLastAmountOfItemsToReduce.Take(amountToTake))
+ {
+ amountOfItemsToReduce
+ };
+
+ }
+
public IEnumerable<int> GetLastAmountOfItemsToIndex()
{
return lastAmountOfItemsToIndex;
}
+ public IEnumerable<int> GetLastAmountOfItemsToReduce()
+ {
+ return lastAmountOfItemsToReduce;
+ }
+
// here we compare, but only up to the last 116 bits, not the full 128 bits
// this means that we can gather documents that are within 4K docs from one another, because
// at that point, it doesn't matter much, it would be gone within one or two indexing cycles
@@ -105,10 +136,10 @@ public int GetHashCode(Guid obj)
var bytes = obj.ToByteArray();
for (var i = 0; i < 14; i++)
{
- start = (start*397) ^ bytes[i];
+ start = (start * 397) ^ bytes[i];
}
var last4Bits = bytes[15] >> 4;
- start = (start*397) ^ last4Bits;
+ start = (start * 397) ^ last4Bits;
return start;
}
@@ -6,7 +6,10 @@ public interface IIndexingScheduler
{
IList<IndexToWorkOn> FilterMapIndexes(IList<IndexToWorkOn> indexes);
void RecordAmountOfItemsToIndex(int value);
+ void RecordAmountOfItemsToReduce(int value);
IEnumerable<int> GetLastAmountOfItemsToIndex();
+ IEnumerable<int> GetLastAmountOfItemsToReduce();
int LastAmountOfItemsToIndexToRemember { get; set; }
+ int LastAmountOfItemsToReduceToRemember { get; set; }
}
}
Oops, something went wrong.

0 comments on commit 0527165

Please sign in to comment.