Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/network-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-common-utils_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-common-utils-java_${scala.binary.version}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.network.shuffle.streaming;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;

/**
* Sent from the client to the server to indicate that the client is ready to receive
* the specified amount of messages from the server.
*/
public final class CreditControlMessage extends StreamingShuffleMessage {
public int shuffleWriterId;
public int shuffleReaderId;

public int numMessages;

public CreditControlMessage(int shuffleWriterId, int shuffleReaderId, int numMessages) {
this.shuffleWriterId = shuffleWriterId;
this.shuffleReaderId = shuffleReaderId;
this.numMessages = numMessages;
}

@Override
public StreamingShuffleMessageType messageType() {
return StreamingShuffleMessageType.CREDIT_CONTROL_MESSAGE;
}

@Override
public int headerLength() {
// 4 bytes for the shuffle writer ID, 4 bytes for the shuffle reader ID,
// 4 bytes for the number of messages
return super.headerLength() + 12;
}

@Override
public void encode(CompositeByteBuf buf) {
super.encode(buf);

// Write the shuffle writer ID
buf.writeInt(shuffleWriterId);
// Write the shuffle reader ID
buf.writeInt(shuffleReaderId);
// Write the number of messages
buf.writeInt(numMessages);
}

public static CreditControlMessage decode(ByteBuf buf) {
// Read the shuffle writer ID
int shuffleWriterId = buf.readInt();
// Read the shuffle reader ID
int shuffleReaderId = buf.readInt();
// Read the number of messages
int numMessages = buf.readInt();

return new CreditControlMessage(shuffleWriterId, shuffleReaderId, numMessages);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.network.shuffle.streaming;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;

public final class DataMessage extends StreamingShuffleMessage {

public ByteBuf data;
public int shuffleWriterId;
public int shuffleReaderId;
public int dataSize;
public long checksum;

public DataMessage(int shuffleWriterId, int shuffleReaderId, int dataSize, ByteBuf data,
long checksum) {
this.shuffleWriterId = shuffleWriterId;
this.shuffleReaderId = shuffleReaderId;
this.dataSize = dataSize;
this.data = data;
this.ownedBuf = data.retain();
this.checksum = checksum;
}

@Override
public StreamingShuffleMessageType messageType() {
return StreamingShuffleMessageType.DATA_MESSAGE_UNSAFE_ROW;
}

@Override
public int headerLength() {
// 4 bytes EACH for shuffle writer ID, shuffle reader ID, data size
// 8 bytes for checksum
return super.headerLength() + 20;
}

@Override
public void encode(CompositeByteBuf buf) {
super.encode(buf);
buf.writeInt(shuffleWriterId);
buf.writeInt(shuffleReaderId);
buf.writeInt(dataSize);
buf.writeLong(checksum);

// Adding data as a component to buf transfers ownership of data to buf. However,
// this DataMessage still has a reference to data, so we need to retain it here.
buf.addComponent(true, data.retain());
}

public static DataMessage decode(ByteBuf message) {
int shuffleWriterId = message.readInt();
int shuffleReaderId = message.readInt();
int dataSize = message.readInt();
long checksum = message.readLong();
return new DataMessage(shuffleWriterId, shuffleReaderId, dataSize, message, checksum);
}

/**
* Returns a slice of {@link #data} containing exactly the serialized records
* (i.e., {@code dataSize} bytes starting at the current reader index).
*/
public ByteBuf getRecordData() {
return data.slice(data.readerIndex(), dataSize);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.network.shuffle.streaming;

import io.netty.buffer.ByteBuf;
import java.util.zip.CRC32C;
import javax.annotation.concurrent.NotThreadSafe;

/**
* Helper class for streaming shuffle checksum calculations.
*/
@NotThreadSafe
public final class ShuffleChecksum {
private final CRC32C crc = new CRC32C();

/**
* Updates checksum for a specified portion of a ByteBuf message.
*
* @param message The ByteBuf to calculate checksum for
* @param startIndex The index of the first byte to calculate checksum for
* @param dataLength The length of the data to calculate checksum for
*/
public void updateChecksum(ByteBuf message, int startIndex, int dataLength) {
assert (startIndex >= 0);
assert (dataLength >= 0);
assert (dataLength <= message.readableBytes());
if (message.hasArray()) {
// heap-based ByteBuf
crc.update(message.array(), message.arrayOffset() + startIndex, dataLength);
} else {
// off-heap ByteBuf
crc.update(message.nioBuffer(startIndex, dataLength));
}
}

public long getValue() {
return crc.getValue();
}

public void reset() {
crc.reset();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.network.shuffle.streaming;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;

import org.apache.spark.QueryContext;
import org.apache.spark.SparkRuntimeException;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;

/**
* Base class for messages sent between the streaming shuffle writers (usually mappers) and
* readers (usually reducers).
*
* To prevent memory leaks, streaming shuffle programmers should always abide by the following
* principles:
*
* 1. If you create a buffer via ByteBufAllocator, you must explicitly release it.
* 2. If you create a new StreamingShuffleMessage, you must call .release() on it.
*
* To make these rules work out, implementations of StreamingShuffleMessage should abide
* by the following rules:
*
* 1. StreamingShuffleMessages should *not* modify the refcount of ByteBufs passed to them
* during encoding. For message implementations without ByteBufs, this isn't a concern.
* But for messages that have ByteBufs (e.g. DataMessage), the encoding method will likely
* call compositeByteBuf.addComponent(), which transfers ownership of the ByteBuf to the
* CompositeByteBuf and decrements the refcount of the ByteBuf. So that the caller can
* *always* follow rule 1 above, the ByteBuf should be retained before being passed to the
* CompositeByteBuf; if this is not done, the refcount of the ByteBuf after leaving
* encode() will be 0, and if the caller follows rule 1, they will try to decrement an
* already 0 refcount. See DataMessage for an example of how to do this properly.
* 2. If StreamingShuffleMessages keep a reference the ByteBufs passed to them during
* decoding, they should increment the refcount of that ByteBuf, and assign it to
* ownedBuf. This is so that resources get cleaned up when callers follow rule 2 above,
* i.e. call .release() on the StreamingShuffleMessage. See DataMessage for an example of
* how to do this properly.
*/
public abstract sealed class StreamingShuffleMessage
permits CreditControlMessage, DataMessage, TerminationAckMessage, TerminationControlMessage {
protected ByteBuf ownedBuf = null;
private Runnable releaseCallback = null;

// To prevent any duplicate/out of order/missing messages, each writer will track the current
// max sequence number that has been sent to each reader. Similarly, each reader will track
// the latest sequence number it has received from each writer. Upon receiving a new message
// from any writer, reader will check if the sequence number is expected. When all finish, the
// reader will send TerminationAckMessage to the writer with the max sequence number that has
// been received, and the writer will check if the latest sequence recorded matches it.

// Thus the sequence number is valid for the following message types:
// 1. all message types from a writer to a reader. To make sure that the reader
// receive all the messages sent by writer in order without missing or duplicate any.
// 2. TerminationAckMessage from a reader to a writer. To make sure at the end of the
// shuffle, the reader receives the same number of messages that the writer has sent.
// Essentially, other message types from reader to writer won't have a valid sequence number.
private long seqNum;
public void setSeqNum(long seqNum) {
this.seqNum = seqNum;
}
public long getSeqNum() { return seqNum; }

/** Returns the type of this message. */
public abstract StreamingShuffleMessageType messageType();

/** Encodes the current message into the provided ByteBuf. */
public void encode(CompositeByteBuf buf) {
buf.writeInt(messageType().id());
buf.writeLong(seqNum);
}

public int headerLength() {
// 4 bytes for message type, 8 bytes for the sequence number
return 12;
}

public void setReleaseCallback(Runnable releaseCallback) {
this.releaseCallback = releaseCallback;
}

/**
* Releases any resources associated with this message.
* In VERY RARE cases when the task fails unexpectedly, this method may be called twice.
* Implementations should not panic in such a case.
*/
public void release() {
if (ownedBuf != null) {
ownedBuf.release();
ownedBuf = null;
}
if (releaseCallback != null) {
releaseCallback.run();
releaseCallback = null;
}
}

public static StreamingShuffleMessage decode(ByteBuf message) {
StreamingShuffleMessageType messageType =
StreamingShuffleMessageType.decode(message.readInt());
long seqNum = message.readLong();

StreamingShuffleMessage shuffleMessage = null;

switch (messageType) {
case DATA_MESSAGE_UNSAFE_ROW:
shuffleMessage = DataMessage.decode(message);
break;
case CREDIT_CONTROL_MESSAGE:
shuffleMessage = CreditControlMessage.decode(message);
break;
case TERMINATION_CONTROL_MESSAGE:
shuffleMessage = TerminationControlMessage.decode(message);
break;
case TERMINATION_ACK_MESSAGE:
shuffleMessage = TerminationAckMessage.decode(message);
break;
default:
// Should not reach here since StreamingShuffleMessageType.decode
// always return valid message type
@SuppressWarnings("unchecked")
Map<String, String> parameters = Map$.MODULE$.<String, String>empty()
.updated("messageType", messageType.name());
throw new SparkRuntimeException("STREAMING_SHUFFLE_UNEXPECTED_MESSAGE_TYPE",
parameters, null, new QueryContext[]{}, "");
}

// shuffleMessage cannot be null
shuffleMessage.setSeqNum(seqNum);

return shuffleMessage;
}

}
Loading