Skip to content
This repository has been archived by the owner on Feb 9, 2021. It is now read-only.

Commit

Permalink
svn merge -c 1491611 FIXES: MAPREDUCE-5308. Shuffling to memory can g…
Browse files Browse the repository at this point in the history
…et out-of-sync when fetching multiple compressed map outputs. Contributed by Nathan Roberts

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.1-beta@1491617 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
jlowe committed Jun 10, 2013
1 parent f577358 commit bb7186b
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 2 deletions.
6 changes: 6 additions & 0 deletions hadoop-mapreduce-project/CHANGES.txt
Expand Up @@ -365,6 +365,9 @@ Release 2.1.0-beta - UNRELEASED
MAPREDUCE-5301. Updated MR code to work with YARN-635 changes of renaming
YarnRemoteException to YarnException. (Siddharth Seth via vinodkv)

MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching
multiple compressed map outputs (Nathan Roberts via jlowe)

BREAKDOWN OF HADOOP-8562 SUBTASKS

MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
Expand Down Expand Up @@ -980,6 +983,9 @@ Release 0.23.9 - UNRELEASED

BUG FIXES

MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching
multiple compressed map outputs (Nathan Roberts via jlowe)

Release 0.23.8 - 2013-06-05

INCOMPATIBLE CHANGES
Expand Down
Expand Up @@ -99,6 +99,19 @@ public void shuffle(MapHost host, InputStream input,
reporter.progress();
LOG.info("Read " + memory.length + " bytes from map-output for " +
getMapId());

/**
* We've gotten the amount of data we were expecting. Verify the
* decompressor has nothing more to offer. This action also forces the
* decompressor to read any trailing bytes that weren't critical
* for decompression, which is necessary to keep the stream
* in sync.
*/
if (input.read() >= 0 ) {
throw new IOException("Unexpected extra bytes from input stream for " +
getMapId());
}

} catch (IOException ioe) {
// Close the streams
IOUtils.cleanup(LOG, input);
Expand Down
Expand Up @@ -37,6 +37,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.IFileOutputStream;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptID;
Expand Down Expand Up @@ -233,6 +234,80 @@ public void testCopyFromHostWait() throws Exception {
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
}
@SuppressWarnings("unchecked")
@Test
public void testCopyFromHostExtraBytes() throws Exception {
LOG.info("testCopyFromHostWaitExtraBytes");
JobConf job = new JobConf();
TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class);

Reporter r = mock(Reporter.class);
ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
ExceptionReporter except = mock(ExceptionReporter.class);
SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
HttpURLConnection connection = mock(HttpURLConnection.class);

Counters.Counter allErrs = mock(Counters.Counter.class);
when(r.getCounter(anyString(), anyString()))
.thenReturn(allErrs);

Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
r, metrics, except, key, connection);


MapHost host = new MapHost("localhost", "http://localhost:8080/");

ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1");
maps.add(map1ID);
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
maps.add(map2ID);
when(ss.getMapsForHost(host)).thenReturn(maps);

String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);

when(connection.getResponseCode()).thenReturn(200);
when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
.thenReturn(replyHash);
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);

ByteArrayOutputStream bout = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bout);
IFileOutputStream ios = new IFileOutputStream(dos);
header.write(dos);
ios.write("MAPDATA123".getBytes());
ios.finish();

ShuffleHeader header2 = new ShuffleHeader(map2ID.toString(), 14, 10, 1);
IFileOutputStream ios2 = new IFileOutputStream(dos);
header2.write(dos);
ios2.write("MAPDATA456".getBytes());
ios2.finish();

ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
when(connection.getInputStream()).thenReturn(in);
// 8 < 10 therefore there appear to be extra bytes in the IFileInputStream
InMemoryMapOutput<Text, Text> mapOut = new InMemoryMapOutput<Text, Text>(job, map1ID, mm, 8, null, true );
InMemoryMapOutput<Text, Text> mapOut2 = new InMemoryMapOutput<Text, Text>(job, map2ID, mm, 10, null, true );

when(mm.reserve(eq(map1ID), anyLong(), anyInt())).thenReturn(mapOut);
when(mm.reserve(eq(map2ID), anyLong(), anyInt())).thenReturn(mapOut2);


underTest.copyFromHost(host);


verify(allErrs).increment(1);
verify(ss).copyFailed(map1ID, host, true, false);
verify(ss, never()).copyFailed(map2ID, host, true, false);

verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
}

@SuppressWarnings("unchecked")
@Test(timeout=10000)
Expand Down Expand Up @@ -265,7 +340,6 @@ public void testCopyFromHostCompressFailure() throws Exception {
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
maps.add(map2ID);
when(ss.getMapsForHost(host)).thenReturn(maps);

String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);

Expand All @@ -292,4 +366,4 @@ public void testCopyFromHostCompressFailure() throws Exception {
encHash);
verify(ss, times(1)).copyFailed(map1ID, host, true, false);
}
}
}

0 comments on commit bb7186b

Please sign in to comment.