forked from ravendb/ravendb
-
Notifications
You must be signed in to change notification settings - Fork 1
/
IndexBatchSizeAutoTuner.cs
160 lines (130 loc) · 6.2 KB
/
IndexBatchSizeAutoTuner.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
using System;
using Raven.Database.Config;
namespace Raven.Database.Indexing
{
public class IndexBatchSizeAutoTuner
{
private readonly WorkContext context;
private int numberOfItemsToIndexInSingleBatch;
protected int lastAmountOfItemsToIndex;
public IndexBatchSizeAutoTuner(WorkContext context)
{
this.context = context;
numberOfItemsToIndexInSingleBatch = context.Configuration.InitialNumberOfItemsToIndexInSingleBatch;
}
public int NumberOfItemsToIndexInSingleBatch
{
get { return numberOfItemsToIndexInSingleBatch; }
set
{
context.CurrentNumberOfItemsToIndexInSingleBatch = numberOfItemsToIndexInSingleBatch = value;
}
}
public void AutoThrottleBatchSize(int amountOfItemsToIndex, int size)
{
try
{
if (ReduceBatchSizeIfCloseToMemoryCeiling())
return;
if (ConsiderDecreasingBatchSize(amountOfItemsToIndex))
return;
ConsiderIncreasingBatchSize(amountOfItemsToIndex, size);
}
finally
{
lastAmountOfItemsToIndex = amountOfItemsToIndex;
}
}
private void ConsiderIncreasingBatchSize(int amountOfItemsToIndex, int size)
{
if (amountOfItemsToIndex < NumberOfItemsToIndexInSingleBatch)
{
return;
}
if (lastAmountOfItemsToIndex < NumberOfItemsToIndexInSingleBatch)
{
// this is the first time we hit the limit, we will give another go before we increase
// the batch size
return;
}
// in the previous run, we also hit the current limit, we need to check if we can increase the max batch size
// here we make the assumptions that the average size of documents are the same. We check if we doubled the amount of memory
// that we used for the last batch (note that this is only an estimate number, but should be close enough), would we still be
// within the limits that governs us
var sizeInMegabytes = size / 1024 / 1024;
// we don't actually *know* what the actual cost of indexing, beause that depends on many factors (how the index
// is structured, is it analyzed/default/not analyzed, etc). We just assume for now that it takes 25% of the actual
// on disk structure per each active index. That should give us a good guesstimate about the value.
// Because of the way we are executing indexes, only N are running at once, where N is the parallel level, so we take
// that into account, you may have 10 indexes but only 2 CPUs, so we only consider the cost of executing 2 indexes,
// not all 10
var sizedPlusIndexingCost = sizeInMegabytes * (1 + (0.25 * Math.Min(context.IndexDefinitionStorage.IndexesCount, context.Configuration.MaxNumberOfParallelIndexTasks)));
var remainingMemoryAfterBatchSizeIncrease = MemoryStatistics.AvailablePhysicalMemory - sizedPlusIndexingCost;
if (remainingMemoryAfterBatchSizeIncrease >= context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
{
NumberOfItemsToIndexInSingleBatch = Math.Min(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch,
NumberOfItemsToIndexInSingleBatch * 2);
return;
}
}
private bool ReduceBatchSizeIfCloseToMemoryCeiling()
{
if (MemoryStatistics.AvailablePhysicalMemory >= context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
{
// there is enough memory available for the next indexing run
return false;
}
// we are using too much memory, let us use a less next time...
// maybe it is us? we generate a lot of garbage when doing indexing, so we ask the GC if it would kindly try to
// do something about it.
// Note that this order for this to happen we need:
// * We had two full run when we were doing nothing but indexing at full throttle
// * The system is over the configured limit, and there is a strong likelihood that this is us causing this
// * By forcing a GC, we ensure that we use less memory, and it is not frequent enough to cause perf problems
GC.Collect(1, GCCollectionMode.Optimized);
// let us check again after the GC call, do we still need to reduce the batch size?
if (MemoryStatistics.AvailablePhysicalMemory > context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
{
// we don't want to try increasing things, we just hit the ceiling, maybe on the next try
return true;
}
// we are still too high, let us reduce the size and see what is going on.
NumberOfItemsToIndexInSingleBatch = Math.Max(context.Configuration.InitialNumberOfItemsToIndexInSingleBatch,
NumberOfItemsToIndexInSingleBatch / 2);
return true;
}
private bool ConsiderDecreasingBatchSize(int amountOfItemsToIndex)
{
if (amountOfItemsToIndex >= NumberOfItemsToIndexInSingleBatch)
{
// we had as much work to do as we are currently capable of handling
// there isn't nothing that we need to do here...
return false;
}
// we didn't have a lot of work to do, so let us see if we can reduce the batch size
// we are at the configured minimum, nothing to do
if (NumberOfItemsToIndexInSingleBatch == context.Configuration.InitialNumberOfItemsToIndexInSingleBatch)
return true;
// we were above the max the last time, we can't reduce the work load now
if (lastAmountOfItemsToIndex > NumberOfItemsToIndexInSingleBatch)
return true;
var old = NumberOfItemsToIndexInSingleBatch;
// we have had a couple of times were we didn't get to the current max, so we can probably
// reduce the max again now, this will reduce the memory consumption eventually, and will cause
// faster indexing times in case we get a big batch again
NumberOfItemsToIndexInSingleBatch = Math.Max(context.Configuration.InitialNumberOfItemsToIndexInSingleBatch,
NumberOfItemsToIndexInSingleBatch / 2);
// we just reduced the batch size because we have two concurrent runs where we had
// less to do than the previous runs. That indicate the the busy period is over, maybe we
// run out of data? Or the rate of data entry into the system was just reduce?
// At any rate, there is a strong likelyhood of having a lot of garbage in the system
// let us ask the GC nicely to clean it
// but we only want to do it if the change was significant
if ( NumberOfItemsToIndexInSingleBatch - old > 4096)
{
GC.Collect(1, GCCollectionMode.Optimized);
}
return true;
}
}
}