/
ByteArrayChunkOutputStream.scala
94 lines (82 loc) · 3.12 KB
/
ByteArrayChunkOutputStream.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.util.io
import java.io.OutputStream
import scala.collection.mutable.ArrayBuffer
/**
* An OutputStream that writes to fixed-size chunks of byte arrays.
*
* @param chunkSize size of each chunk, in bytes.
*/
private[spark]
class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
private val chunks = new ArrayBuffer[Array[Byte]]
/** Index of the last chunk. Starting with -1 when the chunks array is empty. */
private var lastChunkIndex = -1
/**
* Next position to write in the last chunk.
*
* If this equals chunkSize, it means for next write we need to allocate a new chunk.
* This can also never be 0.
*/
private var position = chunkSize
override def write(b: Int): Unit = {
allocateNewChunkIfNeeded()
chunks(lastChunkIndex)(position) = b.toByte
position += 1
}
override def write(bytes: Array[Byte], off: Int, len: Int): Unit = {
var written = 0
while (written < len) {
allocateNewChunkIfNeeded()
val thisBatch = math.min(chunkSize - position, len - written)
System.arraycopy(bytes, written + off, chunks(lastChunkIndex), position, thisBatch)
written += thisBatch
position += thisBatch
}
}
@inline
private def allocateNewChunkIfNeeded(): Unit = {
if (position == chunkSize) {
chunks += new Array[Byte](chunkSize)
lastChunkIndex += 1
position = 0
}
}
def toArrays: Array[Array[Byte]] = {
if (lastChunkIndex == -1) {
new Array[Array[Byte]](0)
} else {
// Copy the first n-1 chunks to the output, and then create an array that fits the last chunk.
// An alternative would have been returning an array of ByteBuffers, with the last buffer
// bounded to only the last chunk's position. However, given our use case in Spark (to put
// the chunks in block manager), only limiting the view bound of the buffer would still
// require the block manager to store the whole chunk.
val ret = new Array[Array[Byte]](chunks.size)
for (i <- 0 until chunks.size - 1) {
ret(i) = chunks(i)
}
if (position == chunkSize) {
ret(lastChunkIndex) = chunks(lastChunkIndex)
} else {
ret(lastChunkIndex) = new Array[Byte](position)
System.arraycopy(chunks(lastChunkIndex), 0, ret(lastChunkIndex), 0, position)
}
ret
}
}
}