24
24
import org .apache .flink .runtime .io .network .buffer .BufferBuilder ;
25
25
import org .apache .flink .runtime .io .network .buffer .BufferConsumer ;
26
26
import org .apache .flink .runtime .io .network .buffer .BufferPool ;
27
+ import org .apache .flink .runtime .io .network .buffer .FreeingBufferRecycler ;
28
+ import org .apache .flink .runtime .io .network .buffer .NetworkBuffer ;
27
29
28
30
import javax .annotation .Nullable ;
29
31
30
32
import java .io .IOException ;
31
33
import java .nio .ByteBuffer ;
32
34
import java .util .ArrayDeque ;
33
- import java .util .Queue ;
34
35
35
36
import static org .apache .flink .util .Preconditions .checkArgument ;
36
37
import static org .apache .flink .util .Preconditions .checkNotNull ;
37
38
import static org .apache .flink .util .Preconditions .checkState ;
38
39
39
- /** Placeholder. */
40
- public class SparsePartitionSortedBuffer implements SortBuffer {
40
+ /**
41
+ * * A {@link SortBuffer} implementation which sorts all appended records only by subpartition
42
+ * index. Records of the same subpartition keep the appended order.
43
+ *
44
+ * <p>Different from the {@link SortBasedPartitionSortedBuffer}, in this {@link SortBuffer}
45
+ * implementation, memory segment boundary serves as the nature data boundary of different
46
+ * subpartitions, which means that one memory segment can never contain data from different
47
+ * subpartitions.
48
+ */
49
+ public class HashBasedPartitionSortedBuffer implements SortBuffer {
41
50
42
51
/** A buffer pool to request memory segments from. */
43
52
private final BufferPool bufferPool ;
@@ -46,40 +55,51 @@ public class SparsePartitionSortedBuffer implements SortBuffer {
46
55
private final int numGuaranteedBuffers ;
47
56
48
57
/** Buffers containing data for all subpartitions. */
49
- private final ArrayDeque <BufferBuilder >[] buffers ;
58
+ private final ArrayDeque <BufferConsumer >[] buffers ;
50
59
51
60
// ---------------------------------------------------------------------------------------------
52
61
// Statistics and states
53
62
// ---------------------------------------------------------------------------------------------
54
63
55
- private int numTotalBuffers ;
56
-
57
64
/** Total number of bytes already appended to this sort buffer. */
58
65
private long numTotalBytes ;
59
66
60
67
/** Total number of records already appended to this sort buffer. */
61
68
private long numTotalRecords ;
62
69
70
+ /** Whether this sort buffer is full and ready to read data from. */
71
+ private boolean isFull ;
72
+
63
73
/** Whether this sort buffer is finished. One can only read a finished sort buffer. */
64
74
private boolean isFinished ;
65
75
66
76
/** Whether this sort buffer is released. A released sort buffer can not be used. */
67
77
private boolean isReleased ;
68
78
79
+ // ---------------------------------------------------------------------------------------------
80
+ // For writing
81
+ // ---------------------------------------------------------------------------------------------
82
+
83
+ /** Partial buffers to be appended data for each channel. */
84
+ private final BufferBuilder [] builders ;
85
+
86
+ /** Total number of network buffers already occupied currently by this sort buffer. */
87
+ private int numBuffersOccupied ;
88
+
69
89
// ---------------------------------------------------------------------------------------------
70
90
// For reading
71
91
// ---------------------------------------------------------------------------------------------
72
92
93
+ /** Used to index the current available channel to read data from. */
94
+ private int readOrderIndex ;
95
+
73
96
/** Data of different subpartitions in this sort buffer will be read in this order. */
74
97
private final int [] subpartitionReadOrder ;
75
98
76
99
/** Total number of bytes already read from this sort buffer. */
77
100
private long numTotalBytesRead ;
78
101
79
- /** Used to index the current available channel to read data from. */
80
- private int readOrderIndex ;
81
-
82
- public SparsePartitionSortedBuffer (
102
+ public HashBasedPartitionSortedBuffer (
83
103
BufferPool bufferPool ,
84
104
int numSubpartitions ,
85
105
int numGuaranteedBuffers ,
@@ -89,6 +109,7 @@ public SparsePartitionSortedBuffer(
89
109
this .bufferPool = checkNotNull (bufferPool );
90
110
this .numGuaranteedBuffers = numGuaranteedBuffers ;
91
111
112
+ this .builders = new BufferBuilder [numSubpartitions ];
92
113
this .buffers = new ArrayDeque [numSubpartitions ];
93
114
for (int channel = 0 ; channel < numSubpartitions ; ++channel ) {
94
115
this .buffers [channel ] = new ArrayDeque <>();
@@ -109,133 +130,114 @@ public SparsePartitionSortedBuffer(
109
130
public boolean append (ByteBuffer source , int targetChannel , Buffer .DataType dataType )
110
131
throws IOException {
111
132
checkArgument (source .hasRemaining (), "Cannot append empty data." );
133
+ checkState (!isFull , "Sort buffer is already full." );
112
134
checkState (!isFinished , "Sort buffer is already finished." );
113
135
checkState (!isReleased , "Sort buffer is already released." );
114
136
115
137
int totalBytes = source .remaining ();
116
138
if (dataType .isBuffer ()) {
117
- // return false directly if it can not allocate enough buffers for the given record
118
- Queue <BufferBuilder > freeSegments = allocateBuffersForRecord (totalBytes , targetChannel );
119
- if (freeSegments == null ) {
120
- return false ;
121
- }
122
- writeRecord (source , targetChannel , freeSegments );
139
+ writeRecord (source , targetChannel );
123
140
} else {
124
141
writeEvent (source , targetChannel , dataType );
125
142
}
126
143
127
- ++numTotalRecords ;
128
- numTotalBytes += totalBytes ;
129
- return true ;
144
+ isFull = source .hasRemaining ();
145
+ if (!isFull ) {
146
+ ++numTotalRecords ;
147
+ }
148
+ numTotalBytes += totalBytes - source .remaining ();
149
+ return isFull ;
130
150
}
131
151
132
152
private void writeEvent (ByteBuffer source , int targetChannel , Buffer .DataType dataType ) {
133
- ArrayDeque <BufferBuilder > channelBuffers = buffers [targetChannel ];
134
- if (!channelBuffers .isEmpty ()) {
135
- channelBuffers .peekLast ().finish ();
153
+ BufferBuilder builder = builders [targetChannel ];
154
+ if (builder != null ) {
155
+ builder .finish ();
156
+ buffers [targetChannel ].add (builder .createBufferConsumerFromBeginning ());
157
+ builder .close ();
158
+ builders [targetChannel ] = null ;
136
159
}
137
160
138
- MemorySegment segment = MemorySegmentFactory .allocateUnpooledSegment (source .remaining ());
139
- BufferBuilder builder = new BufferBuilder (segment , dataType , ignored -> {});
140
- builder .append (source );
141
- channelBuffers .add (builder );
161
+ MemorySegment segment =
162
+ MemorySegmentFactory .allocateUnpooledOffHeapMemory (source .remaining ());
163
+ segment .put (0 , source , segment .size ());
164
+ BufferConsumer consumer =
165
+ new BufferConsumer (
166
+ new NetworkBuffer (segment , FreeingBufferRecycler .INSTANCE , dataType ),
167
+ segment .size ());
168
+ buffers [targetChannel ].add (consumer );
142
169
}
143
170
144
- private void writeRecord (
145
- ByteBuffer source , int targetChannel , Queue <BufferBuilder > freeSegments ) {
146
- int remainingFreeSpace = 0 ;
147
- ArrayDeque <BufferBuilder > channelBuffers = buffers [targetChannel ];
148
-
149
- BufferBuilder prevBuffer = null ;
150
- if (!channelBuffers .isEmpty ()) {
151
- prevBuffer = channelBuffers .peekLast ();
152
- remainingFreeSpace = prevBuffer .getWritableBytes ();
153
- }
154
-
155
- if (remainingFreeSpace > 0 ) {
156
- prevBuffer .append (source );
157
- }
158
-
159
- while (source .hasRemaining ()) {
160
- BufferBuilder targetBuffer = freeSegments .poll ();
161
- targetBuffer .append (source );
162
- if (prevBuffer != null ) {
163
- prevBuffer .finish ();
171
+ private void writeRecord (ByteBuffer source , int targetChannel ) throws IOException {
172
+ do {
173
+ BufferBuilder builder = builders [targetChannel ];
174
+ if (builder == null ) {
175
+ builder = requestBufferFromPool ();
176
+ if (builder == null ) {
177
+ break ;
178
+ }
179
+ ++numBuffersOccupied ;
180
+ builders [targetChannel ] = builder ;
164
181
}
165
- channelBuffers .add (targetBuffer );
166
- prevBuffer = targetBuffer ;
167
- }
168
- }
169
-
170
- private ArrayDeque <BufferBuilder > allocateBuffersForRecord (
171
- int numRecordBytes , int targetChannel ) throws IOException {
172
- int remainingFreeSpace = 0 ;
173
- ArrayDeque <BufferBuilder > channelBuffers = buffers [targetChannel ];
174
- if (!channelBuffers .isEmpty ()) {
175
- remainingFreeSpace = channelBuffers .peekLast ().getWritableBytes ();
176
- }
177
182
178
- ArrayDeque <BufferBuilder > freeBuffers = new ArrayDeque <>();
179
- while (remainingFreeSpace < numRecordBytes ) {
180
- BufferBuilder buffer = requestBufferFromPool ();
181
- if (buffer == null ) {
182
- // return null if we can not allocate enough buffers for the appended record
183
- numTotalBuffers -= freeBuffers .size ();
184
- freeBuffers .forEach (BufferBuilder ::close );
185
- freeBuffers .clear ();
186
- return null ;
183
+ builder .append (source );
184
+ if (builder .isFull ()) {
185
+ builder .finish ();
186
+ buffers [targetChannel ].add (builder .createBufferConsumerFromBeginning ());
187
+ builder .close ();
188
+ builders [targetChannel ] = null ;
187
189
}
188
-
189
- ++numTotalBuffers ;
190
- freeBuffers .add (buffer );
191
- remainingFreeSpace += buffer .getMaxCapacity ();
192
- }
193
- return freeBuffers ;
190
+ } while (source .hasRemaining ());
194
191
}
195
192
196
193
private BufferBuilder requestBufferFromPool () throws IOException {
197
194
try {
198
195
// blocking request buffers if there is still guaranteed memory
199
- if (numTotalBuffers < numGuaranteedBuffers ) {
196
+ if (numBuffersOccupied < numGuaranteedBuffers ) {
200
197
return bufferPool .requestBufferBuilderBlocking ();
201
198
}
202
199
} catch (InterruptedException e ) {
203
- throw new IOException ("Interrupted while requesting buffer." );
200
+ throw new IOException ("Interrupted while requesting buffer." , e );
204
201
}
205
202
206
203
return bufferPool .requestBufferBuilder ();
207
204
}
208
205
209
206
@ Override
210
207
public BufferWithChannel copyIntoSegment (MemorySegment target ) {
211
- checkState (hasRemaining (), "No data remaining." );
212
- checkState (isFinished , "Should finish the sort buffer first before coping any data." );
208
+ checkState (isFull , "Sort buffer is not ready to be read." );
213
209
checkState (!isReleased , "Sort buffer is already released." );
214
210
215
211
BufferWithChannel buffer = null ;
212
+ if (!hasRemaining () || readOrderIndex >= subpartitionReadOrder .length ) {
213
+ return null ;
214
+ }
215
+
216
216
int targetChannel = subpartitionReadOrder [readOrderIndex ];
217
217
while (buffer == null ) {
218
- try (BufferBuilder builder = buffers [targetChannel ].poll ()) {
219
- if (builder != null ) {
220
- try (BufferConsumer consumer = builder .createBufferConsumerFromBeginning ()) {
221
- buffer = new BufferWithChannel (consumer .build (), targetChannel );
222
- numTotalBytesRead += buffer .getBuffer ().readableBytes ();
223
- }
224
- } else {
225
- targetChannel = subpartitionReadOrder [++readOrderIndex ];
218
+ BufferConsumer consumer = buffers [targetChannel ].poll ();
219
+ if (consumer != null ) {
220
+ buffer = new BufferWithChannel (consumer .build (), targetChannel );
221
+ numBuffersOccupied -= buffer .getBuffer ().isBuffer () ? 1 : 0 ;
222
+ numTotalBytesRead += buffer .getBuffer ().readableBytes ();
223
+ consumer .close ();
224
+ } else {
225
+ if (++readOrderIndex >= subpartitionReadOrder .length ) {
226
+ break ;
226
227
}
228
+ targetChannel = subpartitionReadOrder [readOrderIndex ];
227
229
}
228
230
}
229
231
return buffer ;
230
232
}
231
233
232
234
@ Override
233
- public long numRecords () {
235
+ public long numTotalRecords () {
234
236
return numTotalRecords ;
235
237
}
236
238
237
239
@ Override
238
- public long numBytes () {
240
+ public long numTotalBytes () {
239
241
return numTotalBytes ;
240
242
}
241
243
@@ -246,12 +248,18 @@ public boolean hasRemaining() {
246
248
247
249
@ Override
248
250
public void finish () {
251
+ checkState (!isFull , "SortBuffer must not be full." );
249
252
checkState (!isFinished , "SortBuffer is already finished." );
250
253
254
+ isFull = true ;
251
255
isFinished = true ;
252
- for (ArrayDeque <BufferBuilder > channelBuffers : buffers ) {
253
- if (!channelBuffers .isEmpty ()) {
254
- channelBuffers .peekLast ().finish ();
256
+ for (int channel = 0 ; channel < builders .length ; ++channel ) {
257
+ BufferBuilder builder = builders [channel ];
258
+ if (builder != null ) {
259
+ builder .finish ();
260
+ buffers [channel ].add (builder .createBufferConsumerFromBeginning ());
261
+ builder .close ();
262
+ builders [channel ] = null ;
255
263
}
256
264
}
257
265
}
@@ -263,13 +271,26 @@ public boolean isFinished() {
263
271
264
272
@ Override
265
273
public void release () {
266
- // the sort buffer can be released by other threads
267
274
if (isReleased ) {
268
275
return ;
269
276
}
270
277
isReleased = true ;
271
278
272
- clear ();
279
+ for (int channel = 0 ; channel < builders .length ; ++channel ) {
280
+ BufferBuilder builder = builders [channel ];
281
+ if (builder != null ) {
282
+ builder .close ();
283
+ builders [channel ] = null ;
284
+ }
285
+ }
286
+
287
+ for (ArrayDeque <BufferConsumer > buffer : buffers ) {
288
+ BufferConsumer consumer = buffer .poll ();
289
+ while (consumer != null ) {
290
+ consumer .close ();
291
+ consumer = buffer .poll ();
292
+ }
293
+ }
273
294
}
274
295
275
296
@ Override
@@ -278,18 +299,16 @@ public boolean isReleased() {
278
299
}
279
300
280
301
@ Override
281
- public void clear () {
282
- for (ArrayDeque <BufferBuilder > channelBuffers : buffers ) {
283
- while (!channelBuffers .isEmpty ()) {
284
- channelBuffers .poll ().close ();
285
- }
286
- }
302
+ public void reset () {
303
+ checkState (!isFinished , "Sort buffer has been finished." );
304
+ checkState (!isReleased , "Sort buffer has been released." );
287
305
288
- numTotalBuffers = 0 ;
289
- numTotalBytes = 0 ;
290
- numTotalRecords = 0 ;
291
- isFinished = false ;
292
- numTotalBytesRead = 0 ;
306
+ isFull = false ;
293
307
readOrderIndex = 0 ;
294
308
}
309
+
310
+ @ Override
311
+ public BufferPool getBufferPool () {
312
+ return bufferPool ;
313
+ }
295
314
}
0 commit comments